From ed7e1f97703ad562314e92785a20d40ee6f1db36 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 7 Apr 2022 16:20:40 +1000 Subject: [PATCH] SP fixes from Whycheproof tests EC point add is a double if affine ordinates are the same. Fix AVX2 implementation of mod inv to do conversion from 26 bits in 32-bit registers to 64 bits in 64-bit registers. Also handle negative value. RSA OAEP: loop down to last byte. Must have a separator. Div for x86_64: fix to have all 1s when high word equals divisor. --- wolfcrypt/src/rsa.c | 2 +- wolfcrypt/src/sp_arm32.c | 368 ++++++++++-------- wolfcrypt/src/sp_arm64.c | 358 +++++++++-------- wolfcrypt/src/sp_armthumb.c | 368 ++++++++++-------- wolfcrypt/src/sp_c32.c | 382 ++++++++++--------- wolfcrypt/src/sp_c64.c | 370 ++++++++++-------- wolfcrypt/src/sp_cortexm.c | 368 ++++++++++-------- wolfcrypt/src/sp_x86_64.c | 657 ++++++++++++++++++-------------- wolfcrypt/src/sp_x86_64_asm.S | 118 ++++-- wolfcrypt/src/sp_x86_64_asm.asm | 118 ++++-- 10 files changed, 1749 insertions(+), 1360 deletions(-) diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index e86dd1f17..320cb8030 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -1561,7 +1561,7 @@ static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen, /* advance idx to index of PS and msg separator, account for PS size of 0*/ idx = hLen + 1 + hLen; - while (idx < pkcsBlockLen && pkcsBlock[idx] == 0) {idx++;} + while (idx < pkcsBlockLen-1 && pkcsBlock[idx] == 0) {idx++;} /* create hash of label for comparison with hash sent */ if ((ret = wc_Hash(hType, optLabel, labelLen, h, hLen)) != 0) { diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index d32d7da36..25f60ec6e 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -31780,6 +31780,17 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -32020,6 +32031,10 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -32036,37 +32051,42 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); + if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -35023,17 +35043,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_8(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -40806,6 +40815,18 @@ static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -41046,6 +41067,10 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -41062,37 +41087,42 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); + if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -44105,18 +44135,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_12(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -52217,6 +52235,19 @@ static int sp_521_cmp_equal_17(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15]) | (a[16] ^ b[16])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_17(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -52457,6 +52488,10 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -52473,37 +52508,42 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); + if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -56134,19 +56174,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_17(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -65989,6 +66016,20 @@ static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b) (a[30] ^ b[30]) | (a[31] ^ b[31])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_32(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -66229,6 +66270,10 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -66245,37 +66290,42 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); + if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -74650,20 +74700,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_32(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index afb365d92..49eb1f630 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -23660,6 +23660,17 @@ static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_4(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -23900,6 +23911,10 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -23916,36 +23931,41 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_256_mont_sub_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_4(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); - sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); + if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -39366,17 +39386,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_4(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -43673,6 +43682,17 @@ static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_6(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -43913,6 +43933,10 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -43929,37 +43953,42 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_384_mont_sub_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_6(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); - sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(t3, y, p384_mod); - sp_384_mont_sub_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); + if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t3, y, p384_mod); + sp_384_mont_sub_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -65234,17 +65263,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_6(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -71773,6 +71791,18 @@ static int sp_521_cmp_equal_9(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -72013,6 +72043,10 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -72029,37 +72063,42 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); + if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -110084,18 +110123,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_9(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -115667,6 +115694,18 @@ static int sp_1024_cmp_equal_16(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_16(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -115907,6 +115946,10 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -115923,37 +115966,42 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(t3, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); + if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + sp_1024_proj_point_dbl_16(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_16(t3, y, p1024_mod); + sp_1024_mont_sub_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -123636,18 +123684,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_16(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 01e286436..0a038d321 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -100020,6 +100020,17 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -100260,6 +100271,10 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -100276,37 +100291,42 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); + if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -103263,17 +103283,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_8(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -110391,6 +110400,18 @@ static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -110631,6 +110652,10 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -110647,37 +110672,42 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); + if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -113690,18 +113720,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_12(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -123925,6 +123943,19 @@ static int sp_521_cmp_equal_17(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15]) | (a[16] ^ b[16])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_17(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -124165,6 +124196,10 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -124181,37 +124216,42 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); + if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -127842,19 +127882,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_17(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -208520,6 +208547,20 @@ static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b) (a[30] ^ b[30]) | (a[31] ^ b[31])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_32(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -208760,6 +208801,10 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -208776,37 +208821,42 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); + if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -217181,20 +217231,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_32(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index c97aa7306..2b7e71f43 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -57,10 +57,10 @@ do { \ int ii; \ byte nb[(bits + 7) / 8]; \ - sp_digit s[words]; \ - XMEMCPY(s, var, sizeof(s)); \ - sp_##total##_norm_##words(s); \ - sp_##total##_to_bin_##words(s, nb); \ + sp_digit _s[words]; \ + XMEMCPY(_s, var, sizeof(_s)); \ + sp_##total##_norm_##words(_s); \ + sp_##total##_to_bin_##words(_s, nb); \ fprintf(stderr, name "=0x"); \ for (ii=0; ii<(bits + 7) / 8; ii++) \ fprintf(stderr, "%02x", nb[ii]); \ @@ -21807,6 +21807,18 @@ static int sp_256_cmp_equal_9(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -22047,6 +22059,10 @@ static void sp_256_proj_point_add_9(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -22063,37 +22079,42 @@ static void sp_256_proj_point_add_9(sp_point_256* r, sp_256_mont_sub_9(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_9(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(x, x, t5, p256_mod); - sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(t3, y, p256_mod); - sp_256_mont_sub_9(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_9(y, y, x, p256_mod); - sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(y, y, t5, p256_mod); + if (sp_256_iszero_9(t2) & sp_256_iszero_9(t4) & maskt) { + sp_256_proj_point_dbl_9(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(x, x, t5, p256_mod); + sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_9(t3, y, p256_mod); + sp_256_mont_sub_9(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_9(y, y, x, p256_mod); + sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -25142,18 +25163,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_9(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -29067,6 +29076,18 @@ static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b) (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_15(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -29307,6 +29328,10 @@ static void sp_384_proj_point_add_15(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -29323,37 +29348,42 @@ static void sp_384_proj_point_add_15(sp_point_384* r, sp_384_mont_sub_15(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_15(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(x, x, t5, p384_mod); - sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(t3, y, p384_mod); - sp_384_mont_sub_15(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_15(y, y, x, p384_mod); - sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(y, y, t5, p384_mod); + if (sp_384_iszero_15(t2) & sp_384_iszero_15(t4) & maskt) { + sp_384_proj_point_dbl_15(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(x, x, t5, p384_mod); + sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(t3, y, p384_mod); + sp_384_mont_sub_15(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_15(y, y, x, p384_mod); + sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 15; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 15; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 15; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 15; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -33030,18 +33060,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_15(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -36581,6 +36599,19 @@ static int sp_521_cmp_equal_21(const sp_digit* a, const sp_digit* b) (a[18] ^ b[18]) | (a[19] ^ b[19]) | (a[20] ^ b[20])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_21(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -36821,6 +36852,10 @@ static void sp_521_proj_point_add_21(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_21(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_21(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -36837,37 +36872,42 @@ static void sp_521_proj_point_add_21(sp_point_521* r, sp_521_mont_sub_21(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_21(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_21(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_21(x, x, t5, p521_mod); - sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_21(t3, y, p521_mod); - sp_521_mont_sub_21(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_21(y, y, x, p521_mod); - sp_521_mont_mul_21(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_21(y, y, t5, p521_mod); + if (sp_521_iszero_21(t2) & sp_521_iszero_21(t4) & maskt) { + sp_521_proj_point_dbl_21(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(x, x, t5, p521_mod); + sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t3, y, p521_mod); + sp_521_mont_sub_21(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_21(y, y, x, p521_mod); + sp_521_mont_mul_21(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 21; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 21; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 21; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 21; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -40976,19 +41016,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_21(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -45164,6 +45191,22 @@ static int sp_1024_cmp_equal_42(const sp_digit* a, const sp_digit* b) (a[39] ^ b[39]) | (a[40] ^ b[40]) | (a[41] ^ b[41])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_42(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] | + a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] | + a[40] | a[41]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -45404,6 +45447,10 @@ static void sp_1024_proj_point_add_42(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -45420,37 +45467,42 @@ static void sp_1024_proj_point_add_42(sp_point_1024* r, sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(x, x, t5, p1024_mod); - sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(t3, y, p1024_mod); - sp_1024_mont_sub_42(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); - sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(y, y, t5, p1024_mod); + if (sp_1024_iszero_42(t2) & sp_1024_iszero_42(t4) & maskt) { + sp_1024_proj_point_dbl_42(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(x, x, t5, p1024_mod); + sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_42(t3, y, p1024_mod); + sp_1024_mont_sub_42(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); + sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 42; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 42; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 42; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 42; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -54360,22 +54412,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_42(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] | - a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] | - a[40] | a[41]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 766193d4f..b167a28ad 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -57,10 +57,10 @@ do { \ int ii; \ byte nb[(bits + 7) / 8]; \ - sp_digit s[words]; \ - XMEMCPY(s, var, sizeof(s)); \ - sp_##total##_norm_##words(s); \ - sp_##total##_to_bin_##words(s, nb); \ + sp_digit _s[words]; \ + XMEMCPY(_s, var, sizeof(_s)); \ + sp_##total##_norm_##words(_s); \ + sp_##total##_to_bin_##words(_s, nb); \ fprintf(stderr, name "=0x"); \ for (ii=0; ii<(bits + 7) / 8; ii++) \ fprintf(stderr, "%02x", nb[ii]); \ @@ -23163,6 +23163,17 @@ static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3]) | (a[4] ^ b[4])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_5(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -23403,6 +23414,10 @@ static void sp_256_proj_point_add_5(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -23419,37 +23434,42 @@ static void sp_256_proj_point_add_5(sp_point_256* r, sp_256_mont_sub_5(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_5(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(x, x, t5, p256_mod); - sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(t3, y, p256_mod); - sp_256_mont_sub_5(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_5(y, y, x, p256_mod); - sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(y, y, t5, p256_mod); + if (sp_256_iszero_5(t2) & sp_256_iszero_5(t4) & maskt) { + sp_256_proj_point_dbl_5(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(x, x, t5, p256_mod); + sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(t3, y, p256_mod); + sp_256_mont_sub_5(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_5(y, y, x, p256_mod); + sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 5; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 5; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 5; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 5; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -26435,17 +26455,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_5(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -29918,6 +29927,17 @@ static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_7(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -30158,6 +30178,10 @@ static void sp_384_proj_point_add_7(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -30174,37 +30198,42 @@ static void sp_384_proj_point_add_7(sp_point_384* r, sp_384_mont_sub_7(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_7(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(x, x, t5, p384_mod); - sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(t3, y, p384_mod); - sp_384_mont_sub_7(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_7(y, y, x, p384_mod); - sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(y, y, t5, p384_mod); + if (sp_384_iszero_7(t2) & sp_384_iszero_7(t4) & maskt) { + sp_384_proj_point_dbl_7(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(x, x, t5, p384_mod); + sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(t3, y, p384_mod); + sp_384_mont_sub_7(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_7(y, y, x, p384_mod); + sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 7; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 7; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 7; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 7; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -33756,17 +33785,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_7(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -37283,6 +37301,18 @@ static int sp_521_cmp_equal_9(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -37523,6 +37553,10 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -37539,37 +37573,42 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); + if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -41022,18 +41061,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_9(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -45090,6 +45117,19 @@ static int sp_1024_cmp_equal_18(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15]) | (a[16] ^ b[16]) | (a[17] ^ b[17])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_18(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -45330,6 +45370,10 @@ static void sp_1024_proj_point_add_18(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -45346,37 +45390,42 @@ static void sp_1024_proj_point_add_18(sp_point_1024* r, sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(x, x, t5, p1024_mod); - sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(t3, y, p1024_mod); - sp_1024_mont_sub_18(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); - sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(y, y, t5, p1024_mod); + if (sp_1024_iszero_18(t2) & sp_1024_iszero_18(t4) & maskt) { + sp_1024_proj_point_dbl_18(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(x, x, t5, p1024_mod); + sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_18(t3, y, p1024_mod); + sp_1024_mont_sub_18(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); + sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 18; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 18; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 18; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 18; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -53468,19 +53517,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_18(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index f8ca2ff9a..17b3f7fca 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -19336,6 +19336,17 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_8(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -19576,6 +19587,10 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -19592,37 +19607,42 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); + if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -22579,17 +22599,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_8(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -26431,6 +26440,18 @@ static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b) (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_12(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -26671,6 +26692,10 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -26687,37 +26712,42 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); + if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -29730,18 +29760,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_12(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -34114,6 +34132,19 @@ static int sp_521_cmp_equal_17(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15]) | (a[16] ^ b[16])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_17(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -34354,6 +34385,10 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -34370,37 +34405,42 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); + if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -38031,19 +38071,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_17(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ /* Add 1 to a. (a = a + 1) * @@ -44112,6 +44139,20 @@ static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b) (a[30] ^ b[30]) | (a[31] ^ b[31])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_32(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | + a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | + a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -44352,6 +44393,10 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -44368,37 +44413,42 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); + if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -52773,20 +52823,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, } #endif /* WOLFSSL_SP_SMALL */ -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_32(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] | - a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] | - a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY /* Read big endian unsigned byte array into r. * diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 2c081a168..74c17871f 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -413,8 +413,10 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_2048_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); for (i = 15; i >= 0; i--) { - sp_digit hi = t1[16 + i] - (t1[16 + i] == div); + sp_digit mask = 0 - (t1[16 + i] == div); + sp_digit hi = t1[16 + i] + mask; r1 = div_2048_word_16(hi, t1[16 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -1134,8 +1136,10 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1); for (i = 31; i >= 0; i--) { - sp_digit hi = t1[32 + i] - (t1[32 + i] == div); + sp_digit mask = 0 - (t1[32 + i] == div); + sp_digit hi = t1[32 + i] + mask; r1 = div_2048_word_32(hi, t1[32 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -3129,8 +3133,10 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_3072_cond_sub_24(&t1[24], &t1[24], d, (sp_digit)0 - r1); for (i = 23; i >= 0; i--) { - sp_digit hi = t1[24 + i] - (t1[24 + i] == div); + sp_digit mask = 0 - (t1[24 + i] == div); + sp_digit hi = t1[24 + i] + mask; r1 = div_3072_word_24(hi, t1[24 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -3850,8 +3856,10 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1); for (i = 47; i >= 0; i--) { - sp_digit hi = t1[48 + i] - (t1[48 + i] == div); + sp_digit mask = 0 - (t1[48 + i] == div); + sp_digit hi = t1[48 + i] + mask; r1 = div_3072_word_48(hi, t1[48 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -5810,8 +5818,10 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_4096_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1); for (i = 63; i >= 0; i--) { - sp_digit hi = t1[64 + i] - (t1[64 + i] == div); + sp_digit mask = 0 - (t1[64 + i] == div); + sp_digit hi = t1[64 + i] + mask; r1 = div_4096_word_64(hi, t1[64 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -8069,6 +8079,17 @@ static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_256_iszero_4(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -8309,6 +8330,10 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -8325,36 +8350,41 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_256_mont_sub_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_4(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); - sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); + if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -9397,6 +9427,10 @@ static void sp_256_proj_point_add_avx2_4(sp_point_256* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod); sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod); @@ -9413,36 +9447,41 @@ static void sp_256_proj_point_add_avx2_4(sp_point_256* r, sp_256_mont_sub_avx2_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_avx2_4(t4, t4, t3, p256_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_avx2_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_avx2_4(x, x, t5, p256_mod); - sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_avx2_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); - sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_avx2_4(y, y, t5, p256_mod); + if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + sp_256_proj_point_dbl_avx2_4(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_avx2_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_avx2_4(x, x, t5, p256_mod); + sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_avx2_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_avx2_4(y, y, t5, p256_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -23559,17 +23598,6 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_256_iszero_4(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ extern void sp_256_add_one_4(sp_digit* a); extern void sp_256_from_bin_bswap(sp_digit* r, int size, const byte* a, int n); @@ -23927,8 +23955,10 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit #endif sp_256_cond_sub_4(&t1[4], &t1[4], d, (sp_digit)0 - r1); for (i = 3; i >= 0; i--) { - sp_digit hi = t1[4 + i] - (t1[4 + i] == div); + sp_digit mask = 0 - (t1[4 + i] == div); + sp_digit hi = t1[4 + i] + mask; r1 = div_256_word_4(hi, t1[4 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -26797,6 +26827,17 @@ static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b) (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_384_iszero_6(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -27037,6 +27078,10 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -27053,37 +27098,42 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_384_mont_sub_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_6(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); - sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(t3, y, p384_mod); - sp_384_mont_sub_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); + if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t3, y, p384_mod); + sp_384_mont_sub_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -28181,6 +28231,10 @@ static void sp_384_proj_point_add_avx2_6(sp_point_384* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod); sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod); @@ -28197,37 +28251,42 @@ static void sp_384_proj_point_add_avx2_6(sp_point_384* r, sp_384_mont_sub_avx2_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_avx2_6(t4, t4, t3, p384_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_avx2_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_avx2_6(x, x, t5, p384_mod); - sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_avx2_6(t3, y, p384_mod); - sp_384_mont_sub_avx2_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); - sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_avx2_6(y, y, t5, p384_mod); + if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + sp_384_proj_point_dbl_avx2_6(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_avx2_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_avx2_6(x, x, t5, p384_mod); + sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_avx2_6(t3, y, p384_mod); + sp_384_mont_sub_avx2_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_avx2_6(y, y, t5, p384_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -48163,17 +48222,6 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_384_iszero_6(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ extern void sp_384_add_one_6(sp_digit* a); extern void sp_384_from_bin_bswap(sp_digit* r, int size, const byte* a, int n); @@ -48533,8 +48581,10 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit #endif sp_384_cond_sub_6(&t1[6], &t1[6], d, (sp_digit)0 - r1); for (i = 5; i >= 0; i--) { - sp_digit hi = t1[6 + i] - (t1[6 + i] == div); + sp_digit mask = 0 - (t1[6 + i] == div); + sp_digit hi = t1[6 + i] + mask; r1 = div_384_word_6(hi, t1[6 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -51290,6 +51340,18 @@ static int sp_521_cmp_equal_9(const sp_digit* a, const sp_digit* b) (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_521_iszero_9(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -51530,6 +51592,10 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -51546,37 +51612,42 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); + if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -52639,6 +52710,10 @@ static void sp_521_proj_point_add_avx2_9(sp_point_521* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_521_mont_sqr_avx2_9(t1, q->z, p521_mod, p521_mp_mod); sp_521_mont_mul_avx2_9(t3, t1, q->z, p521_mod, p521_mp_mod); @@ -52655,37 +52730,42 @@ static void sp_521_proj_point_add_avx2_9(sp_point_521* r, sp_521_mont_sub_avx2_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_avx2_9(t4, t4, t3, p521_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_avx2_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_avx2_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_avx2_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_avx2_9(x, x, t5, p521_mod); - sp_521_mont_mul_avx2_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_avx2_9(t3, y, p521_mod); - sp_521_mont_sub_avx2_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); - sp_521_mont_mul_avx2_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_avx2_9(y, y, t5, p521_mod); + if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + sp_521_proj_point_dbl_avx2_9(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_avx2_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_avx2_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_avx2_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_avx2_9(x, x, t5, p521_mod); + sp_521_mont_mul_avx2_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_avx2_9(t3, y, p521_mod); + sp_521_mont_sub_avx2_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); + sp_521_mont_mul_avx2_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_avx2_9(y, y, t5, p521_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -88807,18 +88887,6 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \ defined(HAVE_ECC_VERIFY) -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_521_iszero_9(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8]) == 0; -} - #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */ extern void sp_521_add_one_9(sp_digit* a); extern void sp_521_from_bin_bswap(sp_digit* r, int size, const byte* a, int n); @@ -89175,6 +89243,7 @@ static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, sp_digit ASSERT_SAVED_VECTOR_REGISTERS(); (void)m; + div = (d[8] << 55) | (d[7] >> 9); XMEMCPY(t1, a, sizeof(*t1) * 2 * 9); r1 = sp_521_cmp_9(&t1[9], d) >= 0; @@ -91320,8 +91389,10 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig #endif sp_1024_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1); for (i = 15; i >= 0; i--) { - sp_digit hi = t1[16 + i] - (t1[16 + i] == div); + sp_digit mask = 0 - (t1[16 + i] == div); + sp_digit hi = t1[16 + i] + mask; r1 = div_1024_word_16(hi, t1[16 + i - 1], div); + r1 |= mask; #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) @@ -92096,6 +92167,18 @@ static int sp_1024_cmp_equal_16(const sp_digit* a, const sp_digit* b) (a[15] ^ b[15])) == 0; } +/* Returns 1 if the number of zero. + * Implementation is constant time. + * + * a Number to check. + * returns 1 if the number is zero and 0 otherwise. + */ +static int sp_1024_iszero_16(const sp_digit* a) +{ + return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | + a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; +} + /* Add two Montgomery form projective points. * * r Result of addition. @@ -92336,6 +92419,10 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -92352,37 +92439,42 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(t3, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); + if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + sp_1024_proj_point_dbl_16(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_16(t3, y, p1024_mod); + sp_1024_mont_sub_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -93418,6 +93510,10 @@ static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, sp_digit* z = t2; int i; + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + /* U1 = X1*Z2^2 */ sp_1024_mont_sqr_avx2_16(t1, q->z, p1024_mod, p1024_mp_mod); sp_1024_mont_mul_avx2_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); @@ -93434,37 +93530,42 @@ static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, sp_1024_mont_sub_avx2_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_avx2_16(t4, t4, t3, p1024_mod); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_avx2_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_avx2_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_avx2_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_avx2_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_avx2_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_avx2_16(t3, y, p1024_mod); - sp_1024_mont_sub_avx2_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); - sp_1024_mont_mul_avx2_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_avx2_16(y, y, t5, p1024_mod); + if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + sp_1024_proj_point_dbl_avx2_16(r, p, t); + } + else { + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_avx2_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_avx2_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_avx2_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_avx2_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_avx2_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_avx2_16(t3, y, p1024_mod); + sp_1024_mont_sub_avx2_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); + sp_1024_mont_mul_avx2_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_avx2_16(y, y, t5, p1024_mod); - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | + (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | + (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | + (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; } } @@ -103190,18 +103291,6 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* re return err; } -/* Returns 1 if the number of zero. - * Implementation is constant time. - * - * a Number to check. - * returns 1 if the number is zero and 0 otherwise. - */ -static int sp_1024_iszero_16(const sp_digit* a) -{ - return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] | - a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; -} - #ifdef HAVE_ECC_CHECK_KEY extern void sp_1024_from_bin_bswap(sp_digit* r, int size, const byte* a, int n); extern void sp_1024_from_bin_movbe(sp_digit* r, int size, const byte* a, int n); diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 21c4eaf80..d71201af8 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -59279,29 +59279,15 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1: jne L_256_mod_inv_avx2_4_uv_start orq %r9, %rsi jne L_256_mod_inv_avx2_4_uv_start - vpsrad $26, %ymm1, %ymm5 - vpsrad $26, %ymm0, %ymm4 - vpermd %ymm5, %ymm13, %ymm5 - vpand %ymm14, %ymm0, %ymm0 - vpand %ymm14, %ymm1, %ymm1 - vpaddd %ymm5, %ymm0, %ymm0 - vpaddd %ymm4, %ymm1, %ymm1 - vpsrad $26, %ymm1, %ymm5 - vpsrad $26, %ymm0, %ymm4 - vpermd %ymm5, %ymm13, %ymm5 - vpand %ymm14, %ymm0, %ymm0 - vpand %ymm14, %ymm1, %ymm1 - vpaddd %ymm5, %ymm0, %ymm0 - vpaddd %ymm4, %ymm1, %ymm1 vpextrd $0x00, %xmm0, %eax vpextrd $0x01, %xmm0, %r8d vpextrd $2, %xmm0, %r10d vpextrd $3, %xmm0, %r12d - vextracti128 $0x01, %ymm0, %xmm0 vpextrd $0x00, %xmm1, %ecx vpextrd $0x01, %xmm1, %r9d vpextrd $2, %xmm1, %r11d vpextrd $3, %xmm1, %r13d + vextracti128 $0x01, %ymm0, %xmm0 vextracti128 $0x01, %ymm1, %xmm1 vpextrd $0x00, %xmm0, %r14d vpextrd $0x00, %xmm1, %r15d @@ -59346,48 +59332,104 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1: jne L_256_mod_inv_avx2_4_uv_start orq %r13, %rsi jne L_256_mod_inv_avx2_4_uv_start - vpsrad $26, %ymm3, %ymm5 - vpsrad $26, %ymm2, %ymm4 - vpermd %ymm5, %ymm13, %ymm5 - vpand %ymm14, %ymm2, %ymm2 - vpand %ymm14, %ymm3, %ymm3 - vpaddd %ymm5, %ymm2, %ymm2 - vpaddd %ymm4, %ymm3, %ymm3 - vpsrad $26, %ymm3, %ymm5 - vpsrad $26, %ymm2, %ymm4 - vpermd %ymm5, %ymm13, %ymm5 - vpand %ymm14, %ymm2, %ymm2 - vpand %ymm14, %ymm3, %ymm3 - vpaddd %ymm5, %ymm2, %ymm2 - vpaddd %ymm4, %ymm3, %ymm3 vpextrd $0x00, %xmm2, %eax vpextrd $0x01, %xmm2, %r8d vpextrd $2, %xmm2, %r10d vpextrd $3, %xmm2, %r12d - vextracti128 $0x01, %ymm2, %xmm2 vpextrd $0x00, %xmm3, %ecx vpextrd $0x01, %xmm3, %r9d vpextrd $2, %xmm3, %r11d vpextrd $3, %xmm3, %r13d + vextracti128 $0x01, %ymm2, %xmm2 vextracti128 $0x01, %ymm3, %xmm3 vpextrd $0x00, %xmm2, %r14d vpextrd $0x00, %xmm3, %r15d L_256_mod_inv_avx2_4_store_done: - movslq %eax, %rax + movl %eax, %esi + andl $0x3ffffff, %eax + sarl $26, %esi + addl %esi, %ecx + movl %ecx, %esi + andl $0x3ffffff, %ecx + sarl $26, %esi + addl %esi, %r8d + movl %r8d, %esi + andl $0x3ffffff, %r8d + sarl $26, %esi + addl %esi, %r9d + movl %r9d, %esi + andl $0x3ffffff, %r9d + sarl $26, %esi + addl %esi, %r10d + movl %r10d, %esi + andl $0x3ffffff, %r10d + sarl $26, %esi + addl %esi, %r11d + movl %r11d, %esi + andl $0x3ffffff, %r11d + sarl $26, %esi + addl %esi, %r12d + movl %r12d, %esi + andl $0x3ffffff, %r12d + sarl $26, %esi + addl %esi, %r13d + movl %r13d, %esi + andl $0x3ffffff, %r13d + sarl $26, %esi + addl %esi, %r14d + movl %r14d, %esi + andl $0x3ffffff, %r14d + sarl $26, %esi + addl %esi, %r15d + movslq %ecx, %rcx + movslq %r9d, %r9 + movslq %r11d, %r11 + movslq %r13d, %r13 + movslq %r15d, %r15 shlq $26, %rcx + shlq $26, %r9 + shlq $26, %r11 + shlq $26, %r13 + shlq $26, %r15 + movslq %eax, %rax addq %rcx, %rax movslq %r8d, %r8 - shlq $26, %r9 - addq %r9, %r8 + adcq %r9, %r8 movslq %r10d, %r10 - shlq $26, %r11 - addq %r11, %r10 + adcq %r11, %r10 movslq %r12d, %r12 - shlq $26, %r13 - addq %r13, %r12 + adcq %r13, %r12 movslq %r14d, %r14 - shlq $26, %r15 + adcq %r15, %r14 + jge L_256_mod_inv_avx2_4_3_no_add_order + movq $0x9cac2fc632551, %rcx + movq $0xada7179e84f3b, %r9 + movq $0xfffffffbce6fa, %r11 + movq $0xfffffffff, %r13 + movq $0xffffffff0000, %r15 + addq %rcx, %rax + addq %r9, %r8 + addq %r11, %r10 + addq %r13, %r12 addq %r15, %r14 + movq $0xfffffffffffff, %rsi + movq %rax, %rcx + andq %rsi, %rax + sarq $52, %rcx + addq %rcx, %r8 + movq %r8, %r9 + andq %rsi, %r8 + sarq $52, %r9 + addq %r9, %r10 + movq %r10, %r11 + andq %rsi, %r10 + sarq $52, %r11 + addq %r11, %r12 + movq %r12, %r13 + andq %rsi, %r12 + sarq $52, %r13 + addq %r13, %r14 +L_256_mod_inv_avx2_4_3_no_add_order: movq %r8, %rcx movq %r10, %r9 movq %r12, %r11 diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index a46b83bd3..971034f56 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -57691,29 +57691,15 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1: jne L_256_mod_inv_avx2_4_uv_start or rdx, r11 jne L_256_mod_inv_avx2_4_uv_start - vpsrad ymm5, ymm1, 26 - vpsrad ymm4, ymm0, 26 - vpermd ymm5, ymm13, ymm5 - vpand ymm0, ymm0, ymm14 - vpand ymm1, ymm1, ymm14 - vpaddd ymm0, ymm0, ymm5 - vpaddd ymm1, ymm1, ymm4 - vpsrad ymm5, ymm1, 26 - vpsrad ymm4, ymm0, 26 - vpermd ymm5, ymm13, ymm5 - vpand ymm0, ymm0, ymm14 - vpand ymm1, ymm1, ymm14 - vpaddd ymm0, ymm0, ymm5 - vpaddd ymm1, ymm1, ymm4 vpextrd eax, xmm0, 0 vpextrd r10d, xmm0, 1 vpextrd r12d, xmm0, 2 vpextrd r14d, xmm0, 3 - vextracti128 xmm0, ymm0, 1 vpextrd r9d, xmm1, 0 vpextrd r11d, xmm1, 1 vpextrd r13d, xmm1, 2 vpextrd r15d, xmm1, 3 + vextracti128 xmm0, ymm0, 1 vextracti128 xmm1, ymm1, 1 vpextrd edi, xmm0, 0 vpextrd esi, xmm1, 0 @@ -57758,48 +57744,104 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1: jne L_256_mod_inv_avx2_4_uv_start or rdx, r15 jne L_256_mod_inv_avx2_4_uv_start - vpsrad ymm5, ymm3, 26 - vpsrad ymm4, ymm2, 26 - vpermd ymm5, ymm13, ymm5 - vpand ymm2, ymm2, ymm14 - vpand ymm3, ymm3, ymm14 - vpaddd ymm2, ymm2, ymm5 - vpaddd ymm3, ymm3, ymm4 - vpsrad ymm5, ymm3, 26 - vpsrad ymm4, ymm2, 26 - vpermd ymm5, ymm13, ymm5 - vpand ymm2, ymm2, ymm14 - vpand ymm3, ymm3, ymm14 - vpaddd ymm2, ymm2, ymm5 - vpaddd ymm3, ymm3, ymm4 vpextrd eax, xmm2, 0 vpextrd r10d, xmm2, 1 vpextrd r12d, xmm2, 2 vpextrd r14d, xmm2, 3 - vextracti128 xmm2, ymm2, 1 vpextrd r9d, xmm3, 0 vpextrd r11d, xmm3, 1 vpextrd r13d, xmm3, 2 vpextrd r15d, xmm3, 3 + vextracti128 xmm2, ymm2, 1 vextracti128 xmm3, ymm3, 1 vpextrd edi, xmm2, 0 vpextrd esi, xmm3, 0 L_256_mod_inv_avx2_4_store_done: - movsxd rax, eax + mov edx, eax + and eax, 67108863 + sar edx, 26 + add r9d, edx + mov edx, r9d + and r9d, 67108863 + sar edx, 26 + add r10d, edx + mov edx, r10d + and r10d, 67108863 + sar edx, 26 + add r11d, edx + mov edx, r11d + and r11d, 67108863 + sar edx, 26 + add r12d, edx + mov edx, r12d + and r12d, 67108863 + sar edx, 26 + add r13d, edx + mov edx, r13d + and r13d, 67108863 + sar edx, 26 + add r14d, edx + mov edx, r14d + and r14d, 67108863 + sar edx, 26 + add r15d, edx + mov edx, r15d + and r15d, 67108863 + sar edx, 26 + add edi, edx + mov edx, edi + and edi, 67108863 + sar edx, 26 + add esi, edx + movsxd r9, r9d + movsxd r11, r11d + movsxd r13, r13d + movsxd r15, r15d + movsxd rsi, esi shl r9, 26 + shl r11, 26 + shl r13, 26 + shl r15, 26 + shl rsi, 26 + movsxd rax, eax add rax, r9 movsxd r10, r10d - shl r11, 26 - add r10, r11 + adc r10, r11 movsxd r12, r12d - shl r13, 26 - add r12, r13 + adc r12, r13 movsxd r14, r14d - shl r15, 26 - add r14, r15 + adc r14, r15 movsxd rdi, edi - shl rsi, 26 + adc rdi, rsi + jge L_256_mod_inv_avx2_4_3_no_add_order + mov r9, 2756213597218129 + mov r11, 3054930678533947 + mov r13, 4503599622973178 + mov r15, 68719476735 + mov rsi, 281474976645120 + add rax, r9 + add r10, r11 + add r12, r13 + add r14, r15 add rdi, rsi + mov rdx, 4503599627370495 + mov r9, rax + and rax, rdx + sar r9, 52 + add r10, r9 + mov r11, r10 + and r10, rdx + sar r11, 52 + add r12, r11 + mov r13, r12 + and r12, rdx + sar r13, 52 + add r14, r13 + mov r15, r14 + and r14, rdx + sar r15, 52 + add rdi, r15 +L_256_mod_inv_avx2_4_3_no_add_order: mov r9, r10 mov r11, r12 mov r13, r14