linuxkm: add the remainder of known needed SAVE_VECTOR_REGISTERS() wrappers to PK algs, add DEBUG_VECTOR_REGISTERS_{EXIT,ABORT}_ON_FAIL options; add a slew of ASSERT_SAVED_VECTOR_REGISTERS() to sp_x86_64.c (autogenerated, separate scripts commit to follow).

This commit is contained in:
Daniel Pouzzner
2021-10-20 12:01:50 -05:00
parent 75df6508e6
commit 7915f6acb0
8 changed files with 315 additions and 29 deletions

View File

@ -1495,6 +1495,8 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz,
return MP_INIT_E;
}
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (mp_read_unsigned_bin(y, pub, pubSz) != MP_OKAY) {
ret = MP_READ_E;
}
@ -1582,6 +1584,9 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz,
mp_clear(y);
mp_clear(p);
mp_clear(q);
RESTORE_VECTOR_REGISTERS();
#ifdef WOLFSSL_SMALL_STACK
XFREE(q, key->heap, DYNAMIC_TYPE_DH);
XFREE(p, key->heap, DYNAMIC_TYPE_DH);
@ -1815,6 +1820,8 @@ static int _ffc_pairwise_consistency_test(DhKey* key,
return MP_INIT_E;
}
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
/* Load the private and public keys into big integers. */
if (mp_read_unsigned_bin(publicKey, pub, pubSz) != MP_OKAY ||
mp_read_unsigned_bin(privateKey, priv, privSz) != MP_OKAY) {
@ -1869,6 +1876,9 @@ static int _ffc_pairwise_consistency_test(DhKey* key,
mp_forcezero(privateKey);
mp_clear(publicKey);
mp_clear(checkKey);
RESTORE_VECTOR_REGISTERS();
#ifdef WOLFSSL_SMALL_STACK
XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH);
XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH);
@ -1982,7 +1992,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
if (mp_init(y) != MP_OKAY)
return MP_INIT_E;
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
ret = MP_READ_E;
@ -2009,7 +2019,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
if (mp_init(y) != MP_OKAY)
return MP_INIT_E;
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
ret = MP_READ_E;
@ -2036,7 +2046,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
if (mp_init(y) != MP_OKAY)
return MP_INIT_E;
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
ret = MP_READ_E;
@ -2070,7 +2080,7 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
return MP_INIT_E;
}
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY)
ret = MP_READ_E;
@ -2341,6 +2351,8 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
ret = BAD_FUNC_ARG;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
if (ret == 0) {
/* may have leading 0 */
if (p[0] == 0) {
@ -2406,6 +2418,8 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
mp_clear(keyP);
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
@ -2848,7 +2862,7 @@ int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh)
}
#endif
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
if (ret == 0) {
/* force magnitude */

View File

@ -165,6 +165,8 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa)
return MEMORY_E;
}
SAVE_VECTOR_REGISTERS();
#ifdef WOLFSSL_SMALL_STACK
if ((tmpQ = (mp_int *)XMALLOC(sizeof(*tmpQ), NULL, DYNAMIC_TYPE_WOLF_BIGINT)) == NULL)
err = MEMORY_E;
@ -229,6 +231,8 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa)
mp_clear(tmpQ);
#endif
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -660,6 +664,8 @@ int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng)
int ret = 0, halfSz = 0;
byte* tmp; /* initial output pointer */
SAVE_VECTOR_REGISTERS(return _svr_ret;);
do {
if (digest == NULL || out == NULL || key == NULL || rng == NULL) {
ret = BAD_FUNC_ARG;
@ -919,6 +925,8 @@ int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng)
}
} while (0);
RESTORE_VECTOR_REGISTERS();
#ifdef WOLFSSL_SMALL_STACK
if (k) {
if (ret != MP_INIT_E)

View File

@ -4241,6 +4241,8 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
return ECC_BAD_ARG_E;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch(private_key->state) {
case ECC_STATE_NONE:
case ECC_STATE_SHARED_SEC_GEN:
@ -4273,6 +4275,8 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
err = BAD_STATE_E;
} /* switch */
RESTORE_VECTOR_REGISTERS();
WOLFSSL_LEAVE("wc_ecc_shared_secret_ex", err);
/* if async pending then return and skip done cleanup below */
@ -4333,6 +4337,8 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx)
return ECC_BAD_ARG_E;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ALLOC_CURVE_SPECS(3, err);
if (err == MP_OKAY) {
err = wc_ecc_curve_load(wc_ecc_get_curve_params(curve_idx), &curve,
@ -4346,6 +4352,8 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx)
wc_ecc_curve_free(curve);
FREE_CURVE_SPECS();
RESTORE_VECTOR_REGISTERS();
return err;
}
#endif /* USE_ECC_B_PARAM */
@ -5744,8 +5752,14 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
#endif
#endif /* WC_ECC_NONBLOCK */
#if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY))
return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, sign_k,
key->heap);
{
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, sign_k,
key->heap);
RESTORE_VECTOR_REGISTERS();
return ret;
}
#endif
}
#endif
@ -5765,8 +5779,14 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
#endif
#endif /* WC_ECC_NONBLOCK */
#if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY))
return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, sign_k,
key->heap);
{
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, sign_k,
key->heap);
RESTORE_VECTOR_REGISTERS();
return ret;
}
#endif
}
#endif
@ -7256,8 +7276,14 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
#endif
#endif /* WC_ECC_NONBLOCK */
#if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY))
return sp_ecc_verify_256(hash, hashlen, key->pubkey.x,
key->pubkey.y, key->pubkey.z, r, s, res, key->heap);
{
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = sp_ecc_verify_256(hash, hashlen, key->pubkey.x,
key->pubkey.y, key->pubkey.z, r, s, res, key->heap);
RESTORE_VECTOR_REGISTERS();
return ret;
}
#endif
}
#endif
@ -7279,8 +7305,14 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
#endif
#endif /* WC_ECC_NONBLOCK */
#if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY))
return sp_ecc_verify_384(hash, hashlen, key->pubkey.x,
key->pubkey.y, key->pubkey.z, r, s, res, key->heap);
{
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = sp_ecc_verify_384(hash, hashlen, key->pubkey.x,
key->pubkey.y, key->pubkey.z, r, s, res, key->heap);
RESTORE_VECTOR_REGISTERS();
return ret;
}
#endif
}
#endif
@ -7589,6 +7621,8 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen,
if (err != MP_OKAY)
return MEMORY_E;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
/* check for point type (4, 2, or 3) */
pointType = in[0];
if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN &&
@ -7734,6 +7768,8 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen,
mp_clear(point->z);
}
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -8049,6 +8085,8 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
return err;
}
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
/* compute y^2 */
if (err == MP_OKAY)
err = mp_sqr(ecp->y, t1);
@ -8116,6 +8154,9 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
mp_clear(t1);
mp_clear(t2);
RESTORE_VECTOR_REGISTERS();
#ifdef WOLFSSL_SMALL_STACK
XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
@ -10934,7 +10975,7 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
}
#endif /* HAVE_THREAD_LS */
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
/* find point */
idx1 = find_base(A);
@ -11789,7 +11830,7 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
}
#endif
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
#ifdef WOLFSSL_ECIES_ISO18033
XMEMCPY(sharedSecret, out - pubKeySz, pubKeySz);
@ -12064,7 +12105,7 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
}
#endif
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(ret = _svr_ret;);
#ifndef WOLFSSL_ECIES_OLD
if (pubKey == NULL) {
@ -12283,7 +12324,7 @@ int mp_jacobi(mp_int* a, mp_int* n, int* c)
return res;
}
SAVE_VECTOR_REGISTERS();
SAVE_VECTOR_REGISTERS(return _svr_ret;);
if ((res = mp_mod(a, n, &a1)) != MP_OKAY) {
goto done;

View File

@ -1490,14 +1490,17 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType,
err = BAD_STATE_E;
}
if (err == 0) {
params = &key->params;
if (err != 0)
return err;
hs = &key->tmp;
res = &key->pubkey.pubkey;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
params = &key->params;
hs = &key->tmp;
res = &key->pubkey.pubkey;
err = eccsi_load_base(key);
err = eccsi_load_base(key);
}
if (err == 0) {
err = eccsi_load_ecc_params(key);
}
@ -1546,6 +1549,8 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType,
}
}
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -2172,12 +2177,15 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType,
err = BAD_STATE_E;
}
if (err != 0)
return err;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
/* Decode the signature into components. */
if (err == 0) {
r = &key->pubkey.k;
pvt = &key->pubkey.pubkey;
err = eccsi_decode_sig_r_pvt(key, sig, sigSz, r, pvt);
}
r = &key->pubkey.k;
pvt = &key->pubkey.pubkey;
err = eccsi_decode_sig_r_pvt(key, sig, sigSz, r, pvt);
/* Load the curve parameters for operations */
if (err == 0) {
@ -2236,6 +2244,8 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType,
*verified = ((err == 0) && (mp_cmp(jx, r) == MP_EQ));
}
RESTORE_VECTOR_REGISTERS();
return err;
}
#endif /* WOLFCRYPT_ECCSI_CLIENT */

View File

@ -1301,11 +1301,13 @@ int wc_GenerateSakkeRskTable(const SakkeKey* key, const ecc_point* rsk,
err = BAD_FUNC_ARG;
}
if (err == 0) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#ifdef WOLFSSL_SP_1024
err = sp_Pairing_gen_precomp_1024(rsk, table, len);
#else
err = NOT_COMPILED_IN;
#endif
RESTORE_VECTOR_REGISTERS();
}
return err;
@ -2343,6 +2345,8 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz,
err = BAD_FUNC_ARG;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
/* Load elliptic curve parameters */
if (err == 0) {
err = sakke_load_params(key);
@ -2378,6 +2382,8 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz,
*valid = ((err == 0) && (mp_cmp(a, &key->params.g) == MP_EQ));
}
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -6239,6 +6245,8 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz)
err = BAD_FUNC_ARG;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
if (err == 0) {
err = sakke_load_params(key);
}
@ -6251,6 +6259,8 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz)
key->i.idSz = idSz;
}
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -6378,7 +6388,9 @@ int wc_GenerateSakkePointITable(SakkeKey* key, byte* table, word32* len)
#ifdef WOLFSSL_HAVE_SP_ECC
if (err == 0) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
err = sp_ecc_gen_table_1024(key->i.i, table, len, key->heap);
RESTORE_VECTOR_REGISTERS();
}
if (err == 0) {
key->i.table = table;
@ -6566,6 +6578,8 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType,
err = BAD_STATE_E;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
/* Load parameters */
if (err == 0) {
err = sakke_load_params(key);
@ -6634,6 +6648,8 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType,
/* Step 6: Output SSV - already encoded in buffer */
RESTORE_VECTOR_REGISTERS();
return err;
}
@ -6739,6 +6755,8 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv,
err = BAD_STATE_E;
}
SAVE_VECTOR_REGISTERS(return _svr_ret;);
/* Load parameters */
if (err == 0) {
err = sakke_load_params(key);
@ -6803,6 +6821,8 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv,
err = SAKKE_VERIFY_FAIL_E;
}
RESTORE_VECTOR_REGISTERS();
return err;
}
#endif /* WOLFCRYPT_SAKKE_CLIENT */

View File

@ -8837,9 +8837,11 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
err = MP_VAL;
}
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
if (err == MP_OKAY) {
err = sp_exptmod_ex(b, e, e->used, m, r);
}
RESTORE_VECTOR_REGISTERS();
return err;
}
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
@ -13254,6 +13256,8 @@ int sp_prime_is_prime(sp_int* a, int t, int* result)
haveRes = 1;
}
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
/* check against primes table */
for (i = 0; i < SP_PRIME_SIZE; i++) {
@ -13292,6 +13296,8 @@ int sp_prime_is_prime(sp_int* a, int t, int* result)
}
}
RESTORE_VECTOR_REGISTERS();
FREE_SP_INT(b, NULL);
return err;
}
@ -13333,6 +13339,8 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
haveRes = 1;
}
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
/* check against primes table */
for (i = 0; i < SP_PRIME_SIZE; i++) {
@ -13424,6 +13432,9 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
if (result != NULL) {
*result = ret;
}
RESTORE_VECTOR_REGISTERS();
return err;
}
#endif /* WOLFSSL_SP_PRIME_GEN */
@ -13468,6 +13479,8 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
DECL_SP_INT_ARRAY(d, used, 3);
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
if (err == MP_OKAY) {
u = d[0];
@ -13532,6 +13545,8 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
}
FREE_SP_INT_ARRAY(d, NULL);
RESTORE_VECTOR_REGISTERS();
}
return err;
@ -13577,7 +13592,11 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
sp_init_size(t[0], used);
sp_init_size(t[1], used);
err = sp_gcd(a, b, t[0]);
SAVE_VECTOR_REGISTERS(err = _svr_ret;);
if (err == MP_OKAY)
err = sp_gcd(a, b, t[0]);
if (err == MP_OKAY) {
if (_sp_cmp_abs(a, b) == MP_GT) {
err = sp_div(a, t[0], t[1], NULL);
@ -13592,6 +13611,8 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
}
}
}
RESTORE_VECTOR_REGISTERS();
}
FREE_SP_INT_ARRAY(t, NULL);

View File

@ -260,6 +260,8 @@ static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m)
{
XMEMSET(r, 0, sizeof(sp_digit) * 16);
ASSERT_SAVED_VECTOR_REGISTERS();
/* r = 2^n mod m */
sp_2048_sub_in_place_16(r, m);
}
@ -313,6 +315,7 @@ extern sp_digit div_2048_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -330,6 +333,7 @@ static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -392,6 +396,8 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[15];
@ -442,6 +448,7 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_2048_div_16(a, m, NULL, r);
}
@ -473,6 +480,8 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 32) + 32, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -683,6 +692,8 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digi
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 32) + 32, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -843,6 +854,8 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
{
XMEMSET(r, 0, sizeof(sp_digit) * 32);
ASSERT_SAVED_VECTOR_REGISTERS();
/* r = 2^n mod m */
sp_2048_sub_in_place_32(r, m);
}
@ -896,6 +909,7 @@ extern sp_digit div_2048_word_asm_32(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -913,6 +927,7 @@ static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -944,6 +959,8 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[31];
@ -998,6 +1015,7 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s
static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_2048_div_32_cond(a, m, NULL, r);
}
@ -1055,6 +1073,8 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[31];
@ -1106,6 +1126,7 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_2048_div_32(a, m, NULL, r);
}
@ -1138,6 +1159,8 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 64) + 64, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -1332,6 +1355,8 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digi
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 64) + 64, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -1492,6 +1517,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (*outLen < 256) {
err = MP_TO_E;
}
@ -1654,6 +1681,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm,
sp_digit* r;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
(void)pm;
(void)qm;
(void)dpm;
@ -1762,6 +1791,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)dm;
(void)mm;
@ -1972,6 +2003,8 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod,
#endif
int expBits = mp_count_bits(exp);
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 2048 || expBits > 2048 ||
mp_count_bits(mod) != 2048) {
err = MP_READ_E;
@ -2057,6 +2090,8 @@ static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 + 64), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -2182,6 +2217,8 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 + 64), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -2313,6 +2350,8 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 2048 || expLen > 256 ||
mp_count_bits(mod) != 2048) {
err = MP_READ_E;
@ -2415,6 +2454,8 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod,
#endif
int expBits = mp_count_bits(exp);
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 1024 || expBits > 1024 ||
mp_count_bits(mod) != 1024) {
err = MP_READ_E;
@ -2684,6 +2725,8 @@ static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m)
{
XMEMSET(r, 0, sizeof(sp_digit) * 24);
ASSERT_SAVED_VECTOR_REGISTERS();
/* r = 2^n mod m */
sp_3072_sub_in_place_24(r, m);
}
@ -2737,6 +2780,7 @@ extern sp_digit div_3072_word_asm_24(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -2754,6 +2798,7 @@ static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -2816,6 +2861,8 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[23];
@ -2866,6 +2913,7 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_3072_div_24(a, m, NULL, r);
}
@ -2897,6 +2945,8 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 48) + 48, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -3107,6 +3157,8 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digi
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (33 * 48) + 48, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -3267,6 +3319,8 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
{
XMEMSET(r, 0, sizeof(sp_digit) * 48);
ASSERT_SAVED_VECTOR_REGISTERS();
/* r = 2^n mod m */
sp_3072_sub_in_place_48(r, m);
}
@ -3320,6 +3374,7 @@ extern sp_digit div_3072_word_asm_48(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -3337,6 +3392,7 @@ static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -3368,6 +3424,8 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[47];
@ -3422,6 +3480,7 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s
static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_3072_div_48_cond(a, m, NULL, r);
}
@ -3479,6 +3538,8 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[47];
@ -3530,6 +3591,7 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_3072_div_48(a, m, NULL, r);
}
@ -3562,6 +3624,8 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 96) + 96, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -3756,6 +3820,8 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digi
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 96) + 96, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -3916,6 +3982,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (*outLen < 384) {
err = MP_TO_E;
}
@ -4078,6 +4146,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm,
sp_digit* r;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
(void)pm;
(void)qm;
(void)dpm;
@ -4186,6 +4256,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)dm;
(void)mm;
@ -4396,6 +4468,8 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod,
#endif
int expBits = mp_count_bits(exp);
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 3072 || expBits > 3072 ||
mp_count_bits(mod) != 3072) {
err = MP_READ_E;
@ -4481,6 +4555,8 @@ static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (49 + 96), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -4606,6 +4682,8 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (49 + 96), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -4737,6 +4815,8 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 3072 || expLen > 384 ||
mp_count_bits(mod) != 3072) {
err = MP_READ_E;
@ -4839,6 +4919,8 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod,
#endif
int expBits = mp_count_bits(exp);
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 1536 || expBits > 1536 ||
mp_count_bits(mod) != 1536) {
err = MP_READ_E;
@ -5089,6 +5171,8 @@ static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m)
{
XMEMSET(r, 0, sizeof(sp_digit) * 64);
ASSERT_SAVED_VECTOR_REGISTERS();
/* r = 2^n mod m */
sp_4096_sub_in_place_64(r, m);
}
@ -5142,6 +5226,7 @@ extern sp_digit div_4096_word_asm_64(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -5159,6 +5244,7 @@ static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -5190,6 +5276,8 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[63];
@ -5244,6 +5332,7 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s
static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_4096_div_64_cond(a, m, NULL, r);
}
@ -5301,6 +5390,8 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[63];
@ -5352,6 +5443,7 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_4096_div_64(a, m, NULL, r);
}
@ -5384,6 +5476,8 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 128) + 128, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -5578,6 +5672,8 @@ static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digi
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (17 * 128) + 128, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -5738,6 +5834,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (*outLen < 512) {
err = MP_TO_E;
}
@ -5900,6 +5998,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm,
sp_digit* r;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
(void)pm;
(void)qm;
(void)dpm;
@ -6008,6 +6108,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)dm;
(void)mm;
@ -6218,6 +6320,8 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
#endif
int expBits = mp_count_bits(exp);
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 4096 || expBits > 4096 ||
mp_count_bits(mod) != 4096) {
err = MP_READ_E;
@ -6303,6 +6407,8 @@ static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (65 + 128), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -6428,6 +6534,8 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
byte y;
int err = MP_OKAY;
ASSERT_SAVED_VECTOR_REGISTERS();
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (65 + 128), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
@ -6559,6 +6667,8 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
if (mp_count_bits(base) > 4096 || expLen > 512 ||
mp_count_bits(mod) != 4096) {
err = MP_READ_E;
@ -6736,6 +6846,8 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit*
int64_t a32[8];
int64_t o;
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
a32[0] = a[0] & 0xffffffff;
@ -23241,6 +23353,7 @@ extern sp_digit div_256_word_asm_4(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -23258,6 +23371,7 @@ static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -23311,6 +23425,8 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[3];
@ -23361,6 +23477,7 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit
static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_256_div_4(a, m, NULL, r);
}
@ -23387,6 +23504,7 @@ static const uint64_t p256_order_low[2] = {
*/
static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_mul_4(r, a, b);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
@ -23398,6 +23516,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_dig
*/
static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a)
{
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_sqr_4(r, a);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
@ -23413,6 +23532,8 @@ static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n)
{
int i;
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_mont_sqr_order_4(r, a);
for (i=1; i<n; i++) {
sp_256_mont_sqr_order_4(r, r);
@ -23438,6 +23559,9 @@ static int sp_256_mont_inv_order_4_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
sp_digit* t)
{
int err = FP_WOULDBLOCK;
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_mont_inv_order_4_ctx* ctx = (sp_256_mont_inv_order_4_ctx*)sp_ctx;
typedef char ctx_size_test[sizeof(sp_256_mont_inv_order_4_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
@ -23497,6 +23621,8 @@ static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a,
sp_digit* t3 = td + 4 * 4;
int i;
ASSERT_SAVED_VECTOR_REGISTERS();
/* t = a^2 */
sp_256_mont_sqr_order_4(t, a);
/* t = a^3 = t * a */
@ -23584,6 +23710,8 @@ static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, const sp_digit* a, int n
{
int i;
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_mont_sqr_order_avx2_4(r, a);
for (i=1; i<n; i++) {
sp_256_mont_sqr_order_avx2_4(r, r);
@ -23609,6 +23737,9 @@ static int sp_256_mont_inv_order_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, co
sp_digit* t)
{
int err = FP_WOULDBLOCK;
ASSERT_SAVED_VECTOR_REGISTERS();
sp_256_mont_inv_order_avx2_4_ctx* ctx = (sp_256_mont_inv_order_avx2_4_ctx*)sp_ctx;
typedef char ctx_size_test[sizeof(sp_256_mont_inv_order_avx2_4_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
@ -23668,6 +23799,8 @@ static void sp_256_mont_inv_order_avx2_4(sp_digit* r, const sp_digit* a,
sp_digit* t3 = td + 4 * 4;
int i;
ASSERT_SAVED_VECTOR_REGISTERS();
/* t = a^2 */
sp_256_mont_sqr_order_avx2_4(t, a);
/* t = a^3 = t * a */
@ -25786,6 +25919,8 @@ static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
{
sp_digit o;
ASSERT_SAVED_VECTOR_REGISTERS();
o = sp_384_add_6(r, a, b);
sp_384_cond_sub_6(r, r, m, 0 - o);
}
@ -25801,6 +25936,8 @@ static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
sp_digit o;
ASSERT_SAVED_VECTOR_REGISTERS();
o = sp_384_dbl_6(r, a);
sp_384_cond_sub_6(r, r, m, 0 - o);
}
@ -25815,6 +25952,8 @@ static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
sp_digit o;
ASSERT_SAVED_VECTOR_REGISTERS();
o = sp_384_dbl_6(r, a);
sp_384_cond_sub_6(r, r, m, 0 - o);
o = sp_384_add_6(r, r, a);
@ -25834,6 +25973,8 @@ static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
{
sp_digit o;
ASSERT_SAVED_VECTOR_REGISTERS();
o = sp_384_sub_6(r, a, b);
sp_384_cond_add_6(r, r, m, o);
}
@ -47801,6 +47942,7 @@ extern sp_digit div_384_word_asm_6(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -47818,6 +47960,7 @@ static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -47873,6 +48016,8 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[5];
@ -47923,6 +48068,7 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit
static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_384_div_6(a, m, NULL, r);
}
@ -49939,6 +50085,7 @@ extern sp_digit div_1024_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div);
static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
#if _MSC_VER >= 1920
return _udiv128(d1, d0, div, NULL);
#else
@ -49956,6 +50103,7 @@ static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0,
static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0,
sp_digit div)
{
ASSERT_SAVED_VECTOR_REGISTERS();
register sp_digit r asm("rax");
__asm__ __volatile__ (
"divq %3"
@ -50018,6 +50166,8 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
(void)m;
div = d[15];
@ -50068,6 +50218,7 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig
static WC_INLINE int sp_1024_mod_16(sp_digit* r, const sp_digit* a,
const sp_digit* m)
{
ASSERT_SAVED_VECTOR_REGISTERS();
return sp_1024_div_16(a, m, NULL, r);
}
@ -59111,6 +59262,8 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res)
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_ModExp_Fp_star_avx2_1024(base, exp, res);
@ -60719,6 +60872,8 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res)
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_Pairing_avx2_1024(pm, qm, res);
@ -61850,6 +62005,8 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len)
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_Pairing_gen_precomp_avx2_1024(pm, table, len);
@ -61884,6 +62041,8 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* re
word32 cpuid_flags = cpuid_get_flags();
#endif
ASSERT_SAVED_VECTOR_REGISTERS();
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_Pairing_precomp_avx2_1024(pm, qm, res, table, len);

View File

@ -1183,6 +1183,15 @@ decouple library dependencies with standard string, memory and so on.
WOLFSSL_API extern THREAD_LS_T int wc_svr_count;
WOLFSSL_API extern THREAD_LS_T const char *wc_svr_last_file;
WOLFSSL_API extern THREAD_LS_T int wc_svr_last_line;
#ifdef DEBUG_VECTOR_REGISTERS_ABORT_ON_FAIL
#define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE abort();
#elif defined(DEBUG_VECTOR_REGISTERS_EXIT_ON_FAIL)
#define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE exit(1);
#else
#define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE
#endif
#define SAVE_VECTOR_REGISTERS(...) { \
++wc_svr_count; \
if (wc_svr_count > 5) { \
@ -1193,6 +1202,7 @@ decouple library dependencies with standard string, memory and so on.
wc_svr_count, \
wc_svr_last_file, \
wc_svr_last_line); \
DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \
} \
wc_svr_last_file = __FILE__; \
wc_svr_last_line = __LINE__; \
@ -1206,6 +1216,7 @@ decouple library dependencies with standard string, memory and so on.
wc_svr_count, \
wc_svr_last_file, \
wc_svr_last_line); \
DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \
{ fail_clause } \
} \
}
@ -1218,6 +1229,7 @@ decouple library dependencies with standard string, memory and so on.
wc_svr_count, \
wc_svr_last_file, \
wc_svr_last_line); \
DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \
{ fail_clause } \
} \
}
@ -1231,6 +1243,7 @@ decouple library dependencies with standard string, memory and so on.
wc_svr_count, \
wc_svr_last_file, \
wc_svr_last_line); \
DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE \
} \
wc_svr_last_file = __FILE__; \
wc_svr_last_line = __LINE__; \