From 7915f6acb04aad90337ca2eaf8e7172bfe33c1fb Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Wed, 20 Oct 2021 12:01:50 -0500 Subject: [PATCH] linuxkm: add the remainder of known needed SAVE_VECTOR_REGISTERS() wrappers to PK algs, add DEBUG_VECTOR_REGISTERS_{EXIT,ABORT}_ON_FAIL options; add a slew of ASSERT_SAVED_VECTOR_REGISTERS() to sp_x86_64.c (autogenerated, separate scripts commit to follow). --- wolfcrypt/src/dh.c | 24 ++++-- wolfcrypt/src/dsa.c | 8 ++ wolfcrypt/src/ecc.c | 65 +++++++++++++--- wolfcrypt/src/eccsi.c | 32 +++++--- wolfcrypt/src/sakke.c | 20 +++++ wolfcrypt/src/sp_int.c | 23 +++++- wolfcrypt/src/sp_x86_64.c | 159 ++++++++++++++++++++++++++++++++++++++ wolfssl/wolfcrypt/types.h | 13 ++++ 8 files changed, 315 insertions(+), 29 deletions(-) diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 32f8faac6..fc78901ed 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -1495,6 +1495,8 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz, return MP_INIT_E; } + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); + if (mp_read_unsigned_bin(y, pub, pubSz) != MP_OKAY) { ret = MP_READ_E; } @@ -1582,6 +1584,9 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz, mp_clear(y); mp_clear(p); mp_clear(q); + + RESTORE_VECTOR_REGISTERS(); + #ifdef WOLFSSL_SMALL_STACK XFREE(q, key->heap, DYNAMIC_TYPE_DH); XFREE(p, key->heap, DYNAMIC_TYPE_DH); @@ -1815,6 +1820,8 @@ static int _ffc_pairwise_consistency_test(DhKey* key, return MP_INIT_E; } + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); + /* Load the private and public keys into big integers. */ if (mp_read_unsigned_bin(publicKey, pub, pubSz) != MP_OKAY || mp_read_unsigned_bin(privateKey, priv, privSz) != MP_OKAY) { @@ -1869,6 +1876,9 @@ static int _ffc_pairwise_consistency_test(DhKey* key, mp_forcezero(privateKey); mp_clear(publicKey); mp_clear(checkKey); + + RESTORE_VECTOR_REGISTERS(); + #ifdef WOLFSSL_SMALL_STACK XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH); XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH); @@ -1982,7 +1992,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, if (mp_init(y) != MP_OKAY) return MP_INIT_E; - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) ret = MP_READ_E; @@ -2009,7 +2019,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, if (mp_init(y) != MP_OKAY) return MP_INIT_E; - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) ret = MP_READ_E; @@ -2036,7 +2046,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, if (mp_init(y) != MP_OKAY) return MP_INIT_E; - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) ret = MP_READ_E; @@ -2070,7 +2080,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, return MP_INIT_E; } - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) ret = MP_READ_E; @@ -2341,6 +2351,8 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, ret = BAD_FUNC_ARG; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + if (ret == 0) { /* may have leading 0 */ if (p[0] == 0) { @@ -2406,6 +2418,8 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, mp_clear(keyP); } + RESTORE_VECTOR_REGISTERS(); + return ret; } @@ -2848,7 +2862,7 @@ int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh) } #endif - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); if (ret == 0) { /* force magnitude */ diff --git a/wolfcrypt/src/dsa.c b/wolfcrypt/src/dsa.c index b03a6e0f2..39ff55040 100644 --- a/wolfcrypt/src/dsa.c +++ b/wolfcrypt/src/dsa.c @@ -165,6 +165,8 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa) return MEMORY_E; } + SAVE_VECTOR_REGISTERS(); + #ifdef WOLFSSL_SMALL_STACK if ((tmpQ = (mp_int *)XMALLOC(sizeof(*tmpQ), NULL, DYNAMIC_TYPE_WOLF_BIGINT)) == NULL) err = MEMORY_E; @@ -229,6 +231,8 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa) mp_clear(tmpQ); #endif + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -660,6 +664,8 @@ int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng) int ret = 0, halfSz = 0; byte* tmp; /* initial output pointer */ + SAVE_VECTOR_REGISTERS(return _svr_ret;); + do { if (digest == NULL || out == NULL || key == NULL || rng == NULL) { ret = BAD_FUNC_ARG; @@ -919,6 +925,8 @@ int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng) } } while (0); + RESTORE_VECTOR_REGISTERS(); + #ifdef WOLFSSL_SMALL_STACK if (k) { if (ret != MP_INIT_E) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index ea34ff9d6..766122a6e 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -4241,6 +4241,8 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point, return ECC_BAD_ARG_E; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + switch(private_key->state) { case ECC_STATE_NONE: case ECC_STATE_SHARED_SEC_GEN: @@ -4273,6 +4275,8 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point, err = BAD_STATE_E; } /* switch */ + RESTORE_VECTOR_REGISTERS(); + WOLFSSL_LEAVE("wc_ecc_shared_secret_ex", err); /* if async pending then return and skip done cleanup below */ @@ -4333,6 +4337,8 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx) return ECC_BAD_ARG_E; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ALLOC_CURVE_SPECS(3, err); if (err == MP_OKAY) { err = wc_ecc_curve_load(wc_ecc_get_curve_params(curve_idx), &curve, @@ -4346,6 +4352,8 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx) wc_ecc_curve_free(curve); FREE_CURVE_SPECS(); + RESTORE_VECTOR_REGISTERS(); + return err; } #endif /* USE_ECC_B_PARAM */ @@ -5744,8 +5752,14 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, #endif #endif /* WC_ECC_NONBLOCK */ #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) - return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, sign_k, - key->heap); + { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ret = sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, sign_k, + key->heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } #endif } #endif @@ -5765,8 +5779,14 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, #endif #endif /* WC_ECC_NONBLOCK */ #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) - return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, sign_k, - key->heap); + { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ret = sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, sign_k, + key->heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } #endif } #endif @@ -7256,8 +7276,14 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, #endif #endif /* WC_ECC_NONBLOCK */ #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) - return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, - key->pubkey.y, key->pubkey.z, r, s, res, key->heap); + { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ret = sp_ecc_verify_256(hash, hashlen, key->pubkey.x, + key->pubkey.y, key->pubkey.z, r, s, res, key->heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } #endif } #endif @@ -7279,8 +7305,14 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, #endif #endif /* WC_ECC_NONBLOCK */ #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) - return sp_ecc_verify_384(hash, hashlen, key->pubkey.x, - key->pubkey.y, key->pubkey.z, r, s, res, key->heap); + { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ret = sp_ecc_verify_384(hash, hashlen, key->pubkey.x, + key->pubkey.y, key->pubkey.z, r, s, res, key->heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } #endif } #endif @@ -7589,6 +7621,8 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen, if (err != MP_OKAY) return MEMORY_E; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + /* check for point type (4, 2, or 3) */ pointType = in[0]; if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN && @@ -7734,6 +7768,8 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen, mp_clear(point->z); } + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -8049,6 +8085,8 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) return err; } + SAVE_VECTOR_REGISTERS(err = _svr_ret;); + /* compute y^2 */ if (err == MP_OKAY) err = mp_sqr(ecp->y, t1); @@ -8116,6 +8154,9 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) mp_clear(t1); mp_clear(t2); + + RESTORE_VECTOR_REGISTERS(); + #ifdef WOLFSSL_SMALL_STACK XFREE(t2, NULL, DYNAMIC_TYPE_ECC); XFREE(t1, NULL, DYNAMIC_TYPE_ECC); @@ -10934,7 +10975,7 @@ int ecc_mul2add(ecc_point* A, mp_int* kA, } #endif /* HAVE_THREAD_LS */ - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(err = _svr_ret;); /* find point */ idx1 = find_base(A); @@ -11789,7 +11830,7 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, } #endif - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); #ifdef WOLFSSL_ECIES_ISO18033 XMEMCPY(sharedSecret, out - pubKeySz, pubKeySz); @@ -12064,7 +12105,7 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, } #endif - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(ret = _svr_ret;); #ifndef WOLFSSL_ECIES_OLD if (pubKey == NULL) { @@ -12283,7 +12324,7 @@ int mp_jacobi(mp_int* a, mp_int* n, int* c) return res; } - SAVE_VECTOR_REGISTERS(); + SAVE_VECTOR_REGISTERS(return _svr_ret;); if ((res = mp_mod(a, n, &a1)) != MP_OKAY) { goto done; diff --git a/wolfcrypt/src/eccsi.c b/wolfcrypt/src/eccsi.c index a059c916e..1fed1833f 100644 --- a/wolfcrypt/src/eccsi.c +++ b/wolfcrypt/src/eccsi.c @@ -1490,14 +1490,17 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType, err = BAD_STATE_E; } - if (err == 0) { - params = &key->params; + if (err != 0) + return err; - hs = &key->tmp; - res = &key->pubkey.pubkey; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + + params = &key->params; + hs = &key->tmp; + res = &key->pubkey.pubkey; + + err = eccsi_load_base(key); - err = eccsi_load_base(key); - } if (err == 0) { err = eccsi_load_ecc_params(key); } @@ -1546,6 +1549,8 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType, } } + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -2172,12 +2177,15 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType, err = BAD_STATE_E; } + if (err != 0) + return err; + + SAVE_VECTOR_REGISTERS(return _svr_ret;); + /* Decode the signature into components. */ - if (err == 0) { - r = &key->pubkey.k; - pvt = &key->pubkey.pubkey; - err = eccsi_decode_sig_r_pvt(key, sig, sigSz, r, pvt); - } + r = &key->pubkey.k; + pvt = &key->pubkey.pubkey; + err = eccsi_decode_sig_r_pvt(key, sig, sigSz, r, pvt); /* Load the curve parameters for operations */ if (err == 0) { @@ -2236,6 +2244,8 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType, *verified = ((err == 0) && (mp_cmp(jx, r) == MP_EQ)); } + RESTORE_VECTOR_REGISTERS(); + return err; } #endif /* WOLFCRYPT_ECCSI_CLIENT */ diff --git a/wolfcrypt/src/sakke.c b/wolfcrypt/src/sakke.c index 8d95f006b..ab5054538 100644 --- a/wolfcrypt/src/sakke.c +++ b/wolfcrypt/src/sakke.c @@ -1301,11 +1301,13 @@ int wc_GenerateSakkeRskTable(const SakkeKey* key, const ecc_point* rsk, err = BAD_FUNC_ARG; } if (err == 0) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); #ifdef WOLFSSL_SP_1024 err = sp_Pairing_gen_precomp_1024(rsk, table, len); #else err = NOT_COMPILED_IN; #endif + RESTORE_VECTOR_REGISTERS(); } return err; @@ -2343,6 +2345,8 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz, err = BAD_FUNC_ARG; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + /* Load elliptic curve parameters */ if (err == 0) { err = sakke_load_params(key); @@ -2378,6 +2382,8 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz, *valid = ((err == 0) && (mp_cmp(a, &key->params.g) == MP_EQ)); } + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -6239,6 +6245,8 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz) err = BAD_FUNC_ARG; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + if (err == 0) { err = sakke_load_params(key); } @@ -6251,6 +6259,8 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz) key->i.idSz = idSz; } + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -6378,7 +6388,9 @@ int wc_GenerateSakkePointITable(SakkeKey* key, byte* table, word32* len) #ifdef WOLFSSL_HAVE_SP_ECC if (err == 0) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); err = sp_ecc_gen_table_1024(key->i.i, table, len, key->heap); + RESTORE_VECTOR_REGISTERS(); } if (err == 0) { key->i.table = table; @@ -6566,6 +6578,8 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType, err = BAD_STATE_E; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + /* Load parameters */ if (err == 0) { err = sakke_load_params(key); @@ -6634,6 +6648,8 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType, /* Step 6: Output SSV - already encoded in buffer */ + RESTORE_VECTOR_REGISTERS(); + return err; } @@ -6739,6 +6755,8 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv, err = BAD_STATE_E; } + SAVE_VECTOR_REGISTERS(return _svr_ret;); + /* Load parameters */ if (err == 0) { err = sakke_load_params(key); @@ -6803,6 +6821,8 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv, err = SAKKE_VERIFY_FAIL_E; } + RESTORE_VECTOR_REGISTERS(); + return err; } #endif /* WOLFCRYPT_SAKKE_CLIENT */ diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 55618c267..8d6541319 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -8837,9 +8837,11 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } + SAVE_VECTOR_REGISTERS(err = _svr_ret;); if (err == MP_OKAY) { err = sp_exptmod_ex(b, e, e->used, m, r); } + RESTORE_VECTOR_REGISTERS(); return err; } #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || @@ -13254,6 +13256,8 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) haveRes = 1; } + SAVE_VECTOR_REGISTERS(err = _svr_ret;); + if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) { /* check against primes table */ for (i = 0; i < SP_PRIME_SIZE; i++) { @@ -13292,6 +13296,8 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) } } + RESTORE_VECTOR_REGISTERS(); + FREE_SP_INT(b, NULL); return err; } @@ -13333,6 +13339,8 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) haveRes = 1; } + SAVE_VECTOR_REGISTERS(err = _svr_ret;); + if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) { /* check against primes table */ for (i = 0; i < SP_PRIME_SIZE; i++) { @@ -13424,6 +13432,9 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) if (result != NULL) { *result = ret; } + + RESTORE_VECTOR_REGISTERS(); + return err; } #endif /* WOLFSSL_SP_PRIME_GEN */ @@ -13468,6 +13479,8 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r) int used = (a->used >= b->used) ? a->used + 1 : b->used + 1; DECL_SP_INT_ARRAY(d, used, 3); + SAVE_VECTOR_REGISTERS(err = _svr_ret;); + ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL); if (err == MP_OKAY) { u = d[0]; @@ -13532,6 +13545,8 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r) } FREE_SP_INT_ARRAY(d, NULL); + + RESTORE_VECTOR_REGISTERS(); } return err; @@ -13577,7 +13592,11 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r) sp_init_size(t[0], used); sp_init_size(t[1], used); - err = sp_gcd(a, b, t[0]); + SAVE_VECTOR_REGISTERS(err = _svr_ret;); + + if (err == MP_OKAY) + err = sp_gcd(a, b, t[0]); + if (err == MP_OKAY) { if (_sp_cmp_abs(a, b) == MP_GT) { err = sp_div(a, t[0], t[1], NULL); @@ -13592,6 +13611,8 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r) } } } + + RESTORE_VECTOR_REGISTERS(); } FREE_SP_INT_ARRAY(t, NULL); diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 91cd7c421..dc7863f67 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -260,6 +260,8 @@ static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 16); + ASSERT_SAVED_VECTOR_REGISTERS(); + /* r = 2^n mod m */ sp_2048_sub_in_place_16(r, m); } @@ -313,6 +315,7 @@ extern sp_digit div_2048_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -330,6 +333,7 @@ static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -392,6 +396,8 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[15]; @@ -442,6 +448,7 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_16(a, m, NULL, r); } @@ -473,6 +480,8 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 32) + 32, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -683,6 +692,8 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digi byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 32) + 32, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -843,6 +854,8 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 32); + ASSERT_SAVED_VECTOR_REGISTERS(); + /* r = 2^n mod m */ sp_2048_sub_in_place_32(r, m); } @@ -896,6 +909,7 @@ extern sp_digit div_2048_word_asm_32(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -913,6 +927,7 @@ static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -944,6 +959,8 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[31]; @@ -998,6 +1015,7 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_32_cond(a, m, NULL, r); } @@ -1055,6 +1073,8 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[31]; @@ -1106,6 +1126,7 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_32(a, m, NULL, r); } @@ -1138,6 +1159,8 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 64) + 64, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -1332,6 +1355,8 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digi byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 64) + 64, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -1492,6 +1517,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (*outLen < 256) { err = MP_TO_E; } @@ -1654,6 +1681,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)pm; (void)qm; (void)dpm; @@ -1762,6 +1791,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)dm; (void)mm; @@ -1972,6 +2003,8 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 2048 || expBits > 2048 || mp_count_bits(mod) != 2048) { err = MP_READ_E; @@ -2057,6 +2090,8 @@ static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 + 64), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -2182,6 +2217,8 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 + 64), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -2313,6 +2350,8 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 2048 || expLen > 256 || mp_count_bits(mod) != 2048) { err = MP_READ_E; @@ -2415,6 +2454,8 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 1024 || expBits > 1024 || mp_count_bits(mod) != 1024) { err = MP_READ_E; @@ -2684,6 +2725,8 @@ static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 24); + ASSERT_SAVED_VECTOR_REGISTERS(); + /* r = 2^n mod m */ sp_3072_sub_in_place_24(r, m); } @@ -2737,6 +2780,7 @@ extern sp_digit div_3072_word_asm_24(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -2754,6 +2798,7 @@ static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -2816,6 +2861,8 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[23]; @@ -2866,6 +2913,7 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_24(a, m, NULL, r); } @@ -2897,6 +2945,8 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 48) + 48, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -3107,6 +3157,8 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digi byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 48) + 48, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -3267,6 +3319,8 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 48); + ASSERT_SAVED_VECTOR_REGISTERS(); + /* r = 2^n mod m */ sp_3072_sub_in_place_48(r, m); } @@ -3320,6 +3374,7 @@ extern sp_digit div_3072_word_asm_48(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -3337,6 +3392,7 @@ static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -3368,6 +3424,8 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[47]; @@ -3422,6 +3480,7 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_48_cond(a, m, NULL, r); } @@ -3479,6 +3538,8 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[47]; @@ -3530,6 +3591,7 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_48(a, m, NULL, r); } @@ -3562,6 +3624,8 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 96) + 96, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -3756,6 +3820,8 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digi byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 96) + 96, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -3916,6 +3982,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (*outLen < 384) { err = MP_TO_E; } @@ -4078,6 +4146,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)pm; (void)qm; (void)dpm; @@ -4186,6 +4256,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)dm; (void)mm; @@ -4396,6 +4468,8 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 3072 || expBits > 3072 || mp_count_bits(mod) != 3072) { err = MP_READ_E; @@ -4481,6 +4555,8 @@ static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (49 + 96), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -4606,6 +4682,8 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (49 + 96), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -4737,6 +4815,8 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 3072 || expLen > 384 || mp_count_bits(mod) != 3072) { err = MP_READ_E; @@ -4839,6 +4919,8 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 1536 || expBits > 1536 || mp_count_bits(mod) != 1536) { err = MP_READ_E; @@ -5089,6 +5171,8 @@ static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 64); + ASSERT_SAVED_VECTOR_REGISTERS(); + /* r = 2^n mod m */ sp_4096_sub_in_place_64(r, m); } @@ -5142,6 +5226,7 @@ extern sp_digit div_4096_word_asm_64(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -5159,6 +5244,7 @@ static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -5190,6 +5276,8 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[63]; @@ -5244,6 +5332,7 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_4096_div_64_cond(a, m, NULL, r); } @@ -5301,6 +5390,8 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[63]; @@ -5352,6 +5443,7 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_4096_div_64(a, m, NULL, r); } @@ -5384,6 +5476,8 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 128) + 128, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -5578,6 +5672,8 @@ static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digi byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 128) + 128, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -5738,6 +5834,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (*outLen < 512) { err = MP_TO_E; } @@ -5900,6 +5998,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)pm; (void)qm; (void)dpm; @@ -6008,6 +6108,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)dm; (void)mm; @@ -6218,6 +6320,8 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 4096 || expBits > 4096 || mp_count_bits(mod) != 4096) { err = MP_READ_E; @@ -6303,6 +6407,8 @@ static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (65 + 128), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -6428,6 +6534,8 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; + ASSERT_SAVED_VECTOR_REGISTERS(); + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (65 + 128), NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -6559,6 +6667,8 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + if (mp_count_bits(base) > 4096 || expLen > 512 || mp_count_bits(mod) != 4096) { err = MP_READ_E; @@ -6736,6 +6846,8 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* int64_t a32[8]; int64_t o; + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; a32[0] = a[0] & 0xffffffff; @@ -23241,6 +23353,7 @@ extern sp_digit div_256_word_asm_4(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -23258,6 +23371,7 @@ static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -23311,6 +23425,8 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[3]; @@ -23361,6 +23477,7 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_256_div_4(a, m, NULL, r); } @@ -23387,6 +23504,7 @@ static const uint64_t p256_order_low[2] = { */ static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_256_mul_4(r, a, b); sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); } @@ -23398,6 +23516,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_dig */ static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_256_sqr_4(r, a); sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); } @@ -23413,6 +23532,8 @@ static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) { int i; + ASSERT_SAVED_VECTOR_REGISTERS(); + sp_256_mont_sqr_order_4(r, a); for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; @@ -23497,6 +23621,8 @@ static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a, sp_digit* t3 = td + 4 * 4; int i; + ASSERT_SAVED_VECTOR_REGISTERS(); + /* t = a^2 */ sp_256_mont_sqr_order_4(t, a); /* t = a^3 = t * a */ @@ -23584,6 +23710,8 @@ static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, const sp_digit* a, int n { int i; + ASSERT_SAVED_VECTOR_REGISTERS(); + sp_256_mont_sqr_order_avx2_4(r, a); for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; @@ -23668,6 +23799,8 @@ static void sp_256_mont_inv_order_avx2_4(sp_digit* r, const sp_digit* a, sp_digit* t3 = td + 4 * 4; int i; + ASSERT_SAVED_VECTOR_REGISTERS(); + /* t = a^2 */ sp_256_mont_sqr_order_avx2_4(t, a); /* t = a^3 = t * a */ @@ -25786,6 +25919,8 @@ static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, { sp_digit o; + ASSERT_SAVED_VECTOR_REGISTERS(); + o = sp_384_add_6(r, a, b); sp_384_cond_sub_6(r, r, m, 0 - o); } @@ -25801,6 +25936,8 @@ static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; + ASSERT_SAVED_VECTOR_REGISTERS(); + o = sp_384_dbl_6(r, a); sp_384_cond_sub_6(r, r, m, 0 - o); } @@ -25815,6 +25952,8 @@ static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; + ASSERT_SAVED_VECTOR_REGISTERS(); + o = sp_384_dbl_6(r, a); sp_384_cond_sub_6(r, r, m, 0 - o); o = sp_384_add_6(r, r, a); @@ -25834,6 +25973,8 @@ static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, { sp_digit o; + ASSERT_SAVED_VECTOR_REGISTERS(); + o = sp_384_sub_6(r, a, b); sp_384_cond_add_6(r, r, m, o); } @@ -47801,6 +47942,7 @@ extern sp_digit div_384_word_asm_6(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -47818,6 +47960,7 @@ static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -47873,6 +48016,8 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[5]; @@ -47923,6 +48068,7 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_384_div_6(a, m, NULL, r); } @@ -49939,6 +50085,7 @@ extern sp_digit div_1024_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -49956,6 +50103,7 @@ static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) { + ASSERT_SAVED_VECTOR_REGISTERS(); register sp_digit r asm("rax"); __asm__ __volatile__ ( "divq %3" @@ -50018,6 +50166,8 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + (void)m; div = d[15]; @@ -50068,6 +50218,7 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_1024_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) { + ASSERT_SAVED_VECTOR_REGISTERS(); return sp_1024_div_16(a, m, NULL, r); } @@ -59111,6 +59262,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { err = sp_ModExp_Fp_star_avx2_1024(base, exp, res); @@ -60719,6 +60872,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { err = sp_Pairing_avx2_1024(pm, qm, res); @@ -61850,6 +62005,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { err = sp_Pairing_gen_precomp_avx2_1024(pm, table, len); @@ -61884,6 +62041,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* re word32 cpuid_flags = cpuid_get_flags(); #endif + ASSERT_SAVED_VECTOR_REGISTERS(); + #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { err = sp_Pairing_precomp_avx2_1024(pm, qm, res, table, len); diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index 7220aa39b..d6a5e9148 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -1183,6 +1183,15 @@ decouple library dependencies with standard string, memory and so on. WOLFSSL_API extern THREAD_LS_T int wc_svr_count; WOLFSSL_API extern THREAD_LS_T const char *wc_svr_last_file; WOLFSSL_API extern THREAD_LS_T int wc_svr_last_line; + + #ifdef DEBUG_VECTOR_REGISTERS_ABORT_ON_FAIL + #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE abort(); + #elif defined(DEBUG_VECTOR_REGISTERS_EXIT_ON_FAIL) + #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE exit(1); + #else + #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE + #endif + #define SAVE_VECTOR_REGISTERS(...) { \ ++wc_svr_count; \ if (wc_svr_count > 5) { \ @@ -1193,6 +1202,7 @@ decouple library dependencies with standard string, memory and so on. wc_svr_count, \ wc_svr_last_file, \ wc_svr_last_line); \ + DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \ } \ wc_svr_last_file = __FILE__; \ wc_svr_last_line = __LINE__; \ @@ -1206,6 +1216,7 @@ decouple library dependencies with standard string, memory and so on. wc_svr_count, \ wc_svr_last_file, \ wc_svr_last_line); \ + DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \ { fail_clause } \ } \ } @@ -1218,6 +1229,7 @@ decouple library dependencies with standard string, memory and so on. wc_svr_count, \ wc_svr_last_file, \ wc_svr_last_line); \ + DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \ { fail_clause } \ } \ } @@ -1231,6 +1243,7 @@ decouple library dependencies with standard string, memory and so on. wc_svr_count, \ wc_svr_last_file, \ wc_svr_last_line); \ + DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \ } \ wc_svr_last_file = __FILE__; \ wc_svr_last_line = __LINE__; \