Merge pull request #9097 from douzzer/20250812-atomic-cmpxchg

20250812-atomic-cmpxchg
This commit is contained in:
Juliusz Sosinowicz
2025-08-15 01:14:45 +02:00
committed by GitHub
19 changed files with 656 additions and 231 deletions

View File

@@ -21,7 +21,7 @@ jobs:
'--enable-intelasm --enable-sp-asm --enable-mlkem=yes,kyber,ml-kem CPPFLAGS="-DWOLFSSL_ML_KEM_USE_OLD_IDS"',
'--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE"',
'--enable-smallstack --enable-smallstackcache --enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE"',
'--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE" CC=c++'
'--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE" CC=c++'
]
name: make check
if: github.repository_owner == 'wolfssl'

View File

@@ -877,6 +877,7 @@ WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY
WOLFSSL_TRACK_MEMORY_FULL
WOLFSSL_TRAP_MALLOC_SZ
WOLFSSL_UNALIGNED_64BIT_ACCESS
WOLFSSL_USER_DEFINED_ATOMICS
WOLFSSL_USER_FILESYSTEM
WOLFSSL_USER_LOG
WOLFSSL_USER_MUTEX

View File

@@ -42088,10 +42088,10 @@ static int test_wolfSSL_dtls_bad_record(void)
!defined(NO_WOLFSSL_CLIENT) && !defined(NO_WOLFSSL_SERVER) && \
defined(HAVE_IO_TESTS_DEPENDENCIES)
static volatile int test_AEAD_seq_num = 0;
#ifdef WOLFSSL_ATOMIC_INITIALIZER
wolfSSL_Atomic_Int test_AEAD_done = WOLFSSL_ATOMIC_INITIALIZER(0);
#else
#ifdef WOLFSSL_NO_ATOMICS
static volatile int test_AEAD_done = 0;
#else
wolfSSL_Atomic_Int test_AEAD_done = WOLFSSL_ATOMIC_INITIALIZER(0);
#endif
#ifdef WOLFSSL_MUTEX_INITIALIZER
static wolfSSL_Mutex test_AEAD_mutex = WOLFSSL_MUTEX_INITIALIZER(test_AEAD_mutex);

View File

@@ -624,11 +624,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
*/
static int checkedAESNI = 0;
static int haveAESNI = 0;
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
static WARN_UNUSED_RESULT int Check_CPU_support_AES(void)
{
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
return IS_INTEL_AESNI(intel_flags) != 0;
}
@@ -786,15 +786,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#define NEED_AES_TABLES
static int checkedCpuIdFlags = 0;
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
static void Check_CPU_support_HwCrypto(Aes* aes)
{
if (checkedCpuIdFlags == 0) {
cpuid_flags = cpuid_get_flags();
checkedCpuIdFlags = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
aes->use_aes_hw_crypto = IS_AARCH64_AES(cpuid_flags);
#ifdef HAVE_AESGCM
aes->use_pmull_hw_crypto = IS_AARCH64_PMULL(cpuid_flags);

View File

@@ -109,8 +109,7 @@ Public domain.
#define HAVE_INTEL_AVX2
#endif
static int cpuidFlagsSet = 0;
static word32 cpuidFlags = 0;
static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER;
#endif
/**
@@ -332,10 +331,7 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
return 0;
}
if (!cpuidFlagsSet) {
cpuidFlags = cpuid_get_flags();
cpuidFlagsSet = 1;
}
cpuid_get_flags_ex(&cpuidFlags);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(cpuidFlags)) {

View File

@@ -25,8 +25,7 @@
#if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \
defined(HAVE_CPUID_AARCH64)
static word32 cpuid_check = 0;
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef HAVE_CPUID_INTEL
@@ -81,21 +80,22 @@
}
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; }
if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; }
if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; }
if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; }
if (cpuid_flag(1, 0, ECX, 22)) { cpuid_flags |= CPUID_MOVBE ; }
if (cpuid_flag(7, 0, EBX, 3)) { cpuid_flags |= CPUID_BMI1 ; }
if (cpuid_flag(7, 0, EBX, 29)) { cpuid_flags |= CPUID_SHA ; }
cpuid_check = 1;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (cpuid_flag(1, 0, ECX, 28)) { new_cpuid_flags |= CPUID_AVX1 ; }
if (cpuid_flag(7, 0, EBX, 5)) { new_cpuid_flags |= CPUID_AVX2 ; }
if (cpuid_flag(7, 0, EBX, 8)) { new_cpuid_flags |= CPUID_BMI2 ; }
if (cpuid_flag(1, 0, ECX, 30)) { new_cpuid_flags |= CPUID_RDRAND; }
if (cpuid_flag(7, 0, EBX, 18)) { new_cpuid_flags |= CPUID_RDSEED; }
if (cpuid_flag(1, 0, ECX, 25)) { new_cpuid_flags |= CPUID_AESNI ; }
if (cpuid_flag(7, 0, EBX, 19)) { new_cpuid_flags |= CPUID_ADX ; }
if (cpuid_flag(1, 0, ECX, 22)) { new_cpuid_flags |= CPUID_MOVBE ; }
if (cpuid_flag(7, 0, EBX, 3)) { new_cpuid_flags |= CPUID_BMI1 ; }
if (cpuid_flag(7, 0, EBX, 29)) { new_cpuid_flags |= CPUID_SHA ; }
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(HAVE_CPUID_AARCH64)
@@ -113,9 +113,10 @@
/* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers
* /ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0 */
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features;
__asm__ __volatile (
@@ -126,25 +127,26 @@
);
if (features & CPUID_AARCH64_FEAT_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
}
if (features & CPUID_AARCH64_FEAT_SHA256)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (features & CPUID_AARCH64_FEAT_SHA256_512)
cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
if (features & CPUID_AARCH64_FEAT_RDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (features & CPUID_AARCH64_FEAT_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
if (features & CPUID_AARCH64_FEAT_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
if (features & CPUID_AARCH64_FEAT_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__linux__)
@@ -154,42 +156,44 @@
#include <sys/auxv.h>
#include <asm/hwcap.h>
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 hwcaps = getauxval(AT_HWCAP);
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
if (hwcaps & HWCAP_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (hwcaps & HWCAP_PMULL)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (hwcaps & HWCAP_SHA2)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
if (hwcaps & HWCAP_SHA512)
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
#endif
#if defined(HWCAP_ASIMDRDM) && !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
if (hwcaps & HWCAP_ASIMDRDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
if (hwcaps & HWCAP_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
if (hwcaps & HWCAP_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
if (hwcaps & HWCAP_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
(void)hwcaps;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__ANDROID__) || defined(ANDROID)
@@ -198,19 +202,21 @@
#include "cpu-features.h"
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features = android_getCpuFeatures();
if (features & ANDROID_CPU_ARM_FEATURE_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & ANDROID_CPU_ARM_FEATURE_PMULL)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (features & ANDROID_CPU_ARM_FEATURE_SHA2)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__APPLE__)
@@ -229,29 +235,31 @@
return ret;
}
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0)
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__FreeBSD__) || defined(__OpenBSD__)
@@ -259,70 +267,75 @@
#include <sys/auxv.h>
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features = 0;
elf_aux_info(AT_HWCAP, &features, sizeof(features));
if (features & CPUID_AARCH64_FEAT_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
}
if (features & CPUID_AARCH64_FEAT_SHA256)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (features & CPUID_AARCH64_FEAT_SHA256_512)
cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
if (features & CPUID_AARCH64_FEAT_RDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (features & CPUID_AARCH64_FEAT_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
if (features & CPUID_AARCH64_FEAT_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
if (features & CPUID_AARCH64_FEAT_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#else
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_SHA256;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
#endif
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#endif
#elif defined(HAVE_CPUID)
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
cpuid_flags = 0;
cpuid_check = 1;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#endif
@@ -331,24 +344,29 @@
word32 cpuid_get_flags(void)
{
if (!cpuid_check)
cpuid_set_flags();
return cpuid_flags;
cpuid_set_flags();
return WOLFSSL_ATOMIC_LOAD(cpuid_flags);
}
void cpuid_select_flags(word32 flags)
{
cpuid_flags = flags;
WOLFSSL_ATOMIC_STORE(cpuid_flags, flags);
}
void cpuid_set_flag(word32 flag)
{
cpuid_flags |= flag;
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags | flag))
WC_RELAX_LONG_LOOP();
}
void cpuid_clear_flag(word32 flag)
{
cpuid_flags &= ~flag;
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags & ~flag))
WC_RELAX_LONG_LOOP();
}
#endif /* HAVE_CPUID */

View File

@@ -169,7 +169,7 @@
#ifdef WOLFSSL_WC_DILITHIUM
#if defined(USE_INTEL_SPEEDUP)
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef DEBUG_DILITHIUM
@@ -10623,7 +10623,7 @@ int wc_dilithium_init_ex(dilithium_key* key, void* heap, int devId)
}
#if defined(WOLFSSL_WC_DILITHIUM) && defined(USE_INTEL_SPEEDUP)
cpuid_flags = cpuid_get_flags();
cpuid_get_flags_ex(&cpuid_flags);
#endif
return ret;

View File

@@ -83,8 +83,7 @@ and Daniel J. Bernstein
#endif
#ifdef USE_INTEL_POLY1305_SPEEDUP
static word32 intel_flags = 0;
static word32 cpu_flags_set = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#endif
#if defined(USE_INTEL_POLY1305_SPEEDUP) || defined(POLY130564)
@@ -513,10 +512,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
return BAD_FUNC_ARG;
#ifdef USE_INTEL_POLY1305_SPEEDUP
if (!cpu_flags_set) {
intel_flags = cpuid_get_flags();
cpu_flags_set = 1;
}
cpuid_get_flags_ex(&intel_flags);
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags))

View File

@@ -274,8 +274,7 @@ static void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
#endif
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
static word32 cpuid_flags = 0;
static int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
static int InitSha256(wc_Sha256* sha256)
@@ -1763,10 +1762,7 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
#endif
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;
@@ -2048,10 +2044,7 @@ int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data,
sha224->heap = heap;
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;

View File

@@ -57,8 +57,7 @@
#endif
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
static word32 cpuid_flags = 0;
static int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef WOLFSSL_SHA512
@@ -198,10 +197,7 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId,
return ret;
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;
@@ -884,10 +880,7 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
#endif
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;

View File

@@ -184,10 +184,10 @@ This library contains implementation for the random number generator.
#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \
defined(HAVE_AMD_RDSEED)
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
static void wc_InitRng_IntelRD(void)
{
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
}
#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED)
static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz);

View File

@@ -388,7 +388,7 @@ static int InitSha256(wc_Sha256* sha256)
} /* extern "C" */
#endif
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS)
#define WC_NO_INTERNAL_FUNCTION_POINTERS
@@ -425,8 +425,7 @@ static int InitSha256(wc_Sha256* sha256)
}
#endif
if (intel_flags == 0)
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1
@@ -601,7 +600,7 @@ static int InitSha256(wc_Sha256* sha256)
if (transform_check)
return;
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1

View File

@@ -67,8 +67,7 @@
defined(WOLFSSL_ARMASM))
#include <wolfssl/wolfcrypt/cpuid.h>
word32 cpuid_flags;
int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#ifdef WC_C_DYNAMIC_FALLBACK
#define SHA3_BLOCK (sha3->sha3_block)
#define SHA3_BLOCK_N (sha3->sha3_block_n)
@@ -612,17 +611,19 @@ static int InitSha3(wc_Sha3* sha3)
#endif
#ifdef USE_INTEL_SPEEDUP
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
#ifdef WC_C_DYNAMIC_FALLBACK
}
{
int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags);
#ifdef WC_C_DYNAMIC_FALLBACK
(void)cpuid_flags_were_updated;
if (! CAN_SAVE_VECTOR_REGISTERS()) {
SHA3_BLOCK = BlockSha3;
SHA3_BLOCK_N = NULL;
}
else
#else
if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) {
}
else
#endif
if (IS_INTEL_AVX2(cpuid_flags)) {
SHA3_BLOCK = sha3_block_avx2;
@@ -638,11 +639,13 @@ static int InitSha3(wc_Sha3* sha3)
}
}
#define SHA3_FUNC_PTR
#endif
#endif /* USE_INTEL_SPEEDUP */
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
{
int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags);
if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) {
}
else
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
if (IS_AARCH64_SHA3(cpuid_flags)) {
SHA3_BLOCK = BlockSha3_crypto;

View File

@@ -544,7 +544,7 @@ static int InitSha512_256(wc_Sha512* sha512)
} /* extern "C" */
#endif
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS)
#define WC_NO_INTERNAL_FUNCTION_POINTERS
@@ -582,8 +582,7 @@ static int InitSha512_256(wc_Sha512* sha512)
}
#endif
if (intel_flags == 0)
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
#if defined(HAVE_INTEL_AVX2)
if (IS_INTEL_AVX2(intel_flags)) {
@@ -724,7 +723,7 @@ static int InitSha512_256(wc_Sha512* sha512)
if (transform_check)
return;
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
#if defined(HAVE_INTEL_AVX2)
if (IS_INTEL_AVX2(intel_flags)) {

View File

@@ -103,7 +103,7 @@ extern volatile sword16 mlkem_opt_blocker;
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
/* Half of Q plus one. Converted message bit value of 1. */
@@ -1243,7 +1243,7 @@ void mlkem_init(void)
{
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
cpuid_flags = cpuid_get_flags();
cpuid_get_flags_ex(&cpuid_flags);
#endif
}

View File

@@ -1282,6 +1282,11 @@ void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
*c = i;
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
*c = i;
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_fetch_add(c, i, __ATOMIC_RELAXED);
@@ -1291,13 +1296,80 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_add_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i,
int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return __atomic_compare_exchange_n(c, expected_i, new_i, 0 /* weak */,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_fetch_add(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_add_fetch(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return __atomic_compare_exchange_n(
c, expected_i, new_i, 0 /* weak */, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
}
#else
/* Default C Implementation */
void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
{
atomic_init(c, i);
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
atomic_init(c, i);
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return atomic_fetch_add_explicit(c, i, memory_order_relaxed);
@@ -1307,16 +1379,85 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
{
return atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed);
return ret + i;
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
return ret - i;
}
int wolfSSL_Atomic_Int_CompareExchange(
wolfSSL_Atomic_Int* c, int *expected_i, int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return atomic_compare_exchange_strong_explicit(
c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire);
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return atomic_fetch_add_explicit(c, i, memory_order_relaxed);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed);
return ret + i;
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
return ret - i;
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return atomic_compare_exchange_strong_explicit(
c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire);
}
#endif /* __cplusplus */
#elif defined(_MSC_VER)
/* Default C Implementation */
void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
{
*c = i;
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
*c = i;
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return (int)_InterlockedExchangeAdd(c, (long)i);
@@ -1327,6 +1468,76 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
return (int)_InterlockedExchangeAdd(c, (long)-i);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = (int)_InterlockedExchangeAdd(c, (long)i);
return ret + i;
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = (int)_InterlockedExchangeAdd(c, (long)-i);
return ret - i;
}
int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i,
int new_i)
{
long actual_i = InterlockedCompareExchange(c, (long)new_i,
(long)*expected_i);
if (actual_i == (long)*expected_i) {
return 1;
}
else {
*expected_i = (int)actual_i;
return 0;
}
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c,
(long)i);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c,
-(long)i);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = (unsigned int)_InterlockedExchangeAdd
((wolfSSL_Atomic_Int *)c, (long)i);
return ret + i;
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = (unsigned int)_InterlockedExchangeAdd
((wolfSSL_Atomic_Int *)c, -(long)i);
return ret - i;
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
long actual_i = InterlockedCompareExchange
((wolfSSL_Atomic_Int *)c, (long)new_i, (long)*expected_i);
if (actual_i == (long)*expected_i) {
return 1;
}
else {
*expected_i = (unsigned int)actual_i;
return 0;
}
}
#endif
#endif /* WOLFSSL_ATOMIC_OPS */
@@ -1395,7 +1606,8 @@ void wolfSSL_RefWithMutexDec(wolfSSL_RefWithMutex* ref, int* isZero, int* err)
#if WOLFSSL_CRYPT_HW_MUTEX
/* Mutex for protection of cryptography hardware */
static wolfSSL_Mutex wcCryptHwMutex WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex);
static wolfSSL_Mutex wcCryptHwMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex);
#ifndef WOLFSSL_MUTEX_INITIALIZER
static int wcCryptHwMutexInit = 0;
#endif
@@ -1437,20 +1649,20 @@ int wolfSSL_CryptHwMutexUnLock(void)
#if WOLFSSL_CRYPT_HW_MUTEX && defined(WOLFSSL_ALGO_HW_MUTEX)
/* Mutex for protection of cryptography hardware */
#ifndef NO_RNG_MUTEX
static wolfSSL_Mutex wcCryptHwRngMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex);
static wolfSSL_Mutex wcCryptHwRngMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex);
#endif /* NO_RNG_MUTEX */
#ifndef NO_AES_MUTEX
static wolfSSL_Mutex wcCryptHwAesMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex);
static wolfSSL_Mutex wcCryptHwAesMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex);
#endif /* NO_AES_MUTEX */
#ifndef NO_HASH_MUTEX
static wolfSSL_Mutex wcCryptHwHashMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex);
static wolfSSL_Mutex wcCryptHwHashMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex);
#endif /* NO_HASH_MUTEX */
#ifndef NO_PK_MUTEX
static wolfSSL_Mutex wcCryptHwPkMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex);
static wolfSSL_Mutex wcCryptHwPkMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex);
#endif /* NO_PK_MUTEX */
#ifndef WOLFSSL_MUTEX_INITIALIZER

View File

@@ -18892,6 +18892,91 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t memory_test(void)
if (const_byte_ptr_test(const_byte_array, &j) != CBPTR_EXPECTED) {
ret = 1;
}
if (ret != 0)
return WC_TEST_RET_ENC_NC;
}
{
#ifdef WOLFSSL_NO_ATOMICS
int a_int = WOLFSSL_ATOMIC_INITIALIZER(-2);
unsigned int a_uint = WOLFSSL_ATOMIC_INITIALIZER(2);
#else
wolfSSL_Atomic_Int a_int = WOLFSSL_ATOMIC_INITIALIZER(-2);
wolfSSL_Atomic_Uint a_uint = WOLFSSL_ATOMIC_INITIALIZER(2);
#endif
int int_expected;
unsigned int uint_expected;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -2)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 2)
return WC_TEST_RET_ENC_NC;
wolfSSL_Atomic_Int_Init(&a_int, -3);
if (WOLFSSL_ATOMIC_LOAD(a_int) != -3)
return WC_TEST_RET_ENC_NC;
wolfSSL_Atomic_Uint_Init(&a_uint, 3);
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 3)
return WC_TEST_RET_ENC_NC;
WOLFSSL_ATOMIC_STORE(a_int, -4);
if (WOLFSSL_ATOMIC_LOAD(a_int) != -4)
return WC_TEST_RET_ENC_NC;
WOLFSSL_ATOMIC_STORE(a_uint, 4);
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Int_FetchAdd(&a_int, 2) != -4)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -2)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Uint_FetchAdd(&a_uint, 2) != 4)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 6)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Int_FetchSub(&a_int, 2) != -2)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -4)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Uint_FetchSub(&a_uint, 2) != 6)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Int_AddFetch(&a_int, 2) != -2)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -2)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Uint_AddFetch(&a_uint, 2) != 6)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 6)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Int_SubFetch(&a_int, 2) != -4)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -4)
return WC_TEST_RET_ENC_NC;
if (wolfSSL_Atomic_Uint_SubFetch(&a_uint, 2) != 4)
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4)
return WC_TEST_RET_ENC_NC;
int_expected = -5;
if (wolfSSL_Atomic_Int_CompareExchange(&a_int, &int_expected, -7))
return WC_TEST_RET_ENC_NC;
if (int_expected != -4)
return WC_TEST_RET_ENC_NC;
if (! wolfSSL_Atomic_Int_CompareExchange(&a_int, &int_expected, -7))
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_int) != -7)
return WC_TEST_RET_ENC_NC;
uint_expected = 5;
if (wolfSSL_Atomic_Uint_CompareExchange(&a_uint, &uint_expected, 7))
return WC_TEST_RET_ENC_NC;
if (uint_expected != 4)
return WC_TEST_RET_ENC_NC;
if (! wolfSSL_Atomic_Uint_CompareExchange(&a_uint, &uint_expected, 7))
return WC_TEST_RET_ENC_NC;
if (WOLFSSL_ATOMIC_LOAD(a_uint) != 7)
return WC_TEST_RET_ENC_NC;
}
return ret;

View File

@@ -44,6 +44,16 @@
#define HAVE_CPUID_AARCH64
#endif
#define WC_CPUID_UNINITED_VAL 0xffffffffU
#if !defined(WOLFSSL_NO_ATOMICS) && !defined(SINGLE_THREADED)
typedef wolfSSL_Atomic_Uint cpuid_flags_t;
#define WC_CPUID_INITIALIZER \
WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_UNINITED_VAL)
#else
typedef word32 cpuid_flags_t;
#define WC_CPUID_INITIALIZER WC_CPUID_UNINITED_VAL
#endif
#ifdef HAVE_CPUID_INTEL
#define CPUID_AVX1 0x0001
@@ -57,16 +67,16 @@
#define CPUID_BMI1 0x0100 /* ANDN */
#define CPUID_SHA 0x0200 /* SHA-1 and SHA-256 instructions */
#define IS_INTEL_AVX1(f) ((f) & CPUID_AVX1)
#define IS_INTEL_AVX2(f) ((f) & CPUID_AVX2)
#define IS_INTEL_RDRAND(f) ((f) & CPUID_RDRAND)
#define IS_INTEL_RDSEED(f) ((f) & CPUID_RDSEED)
#define IS_INTEL_BMI2(f) ((f) & CPUID_BMI2)
#define IS_INTEL_AESNI(f) ((f) & CPUID_AESNI)
#define IS_INTEL_ADX(f) ((f) & CPUID_ADX)
#define IS_INTEL_MOVBE(f) ((f) & CPUID_MOVBE)
#define IS_INTEL_BMI1(f) ((f) & CPUID_BMI1)
#define IS_INTEL_SHA(f) ((f) & CPUID_SHA)
#define IS_INTEL_AVX1(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AVX1)
#define IS_INTEL_AVX2(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AVX2)
#define IS_INTEL_RDRAND(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDRAND)
#define IS_INTEL_RDSEED(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDSEED)
#define IS_INTEL_BMI2(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_BMI2)
#define IS_INTEL_AESNI(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AESNI)
#define IS_INTEL_ADX(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_ADX)
#define IS_INTEL_MOVBE(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_MOVBE)
#define IS_INTEL_BMI1(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_BMI1)
#define IS_INTEL_SHA(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA)
#elif defined(HAVE_CPUID_AARCH64)
@@ -80,22 +90,31 @@
#define CPUID_SM4 0x0080 /* SM4 enc/dec */
#define CPUID_SB 0x0100 /* Speculation barrier */
#define IS_AARCH64_AES(f) ((f) & CPUID_AES)
#define IS_AARCH64_PMULL(f) ((f) & CPUID_PMULL)
#define IS_AARCH64_SHA256(f) ((f) & CPUID_SHA256)
#define IS_AARCH64_SHA512(f) ((f) & CPUID_SHA512)
#define IS_AARCH64_RDM(f) ((f) & CPUID_RDM)
#define IS_AARCH64_SHA3(f) ((f) & CPUID_SHA3)
#define IS_AARCH64_SM3(f) ((f) & CPUID_SM3)
#define IS_AARCH64_SM4(f) ((f) & CPUID_SM4)
#define IS_AARCH64_SB(f) ((f) & CPUID_SB)
#define IS_AARCH64_AES(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AES)
#define IS_AARCH64_PMULL(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_PMULL)
#define IS_AARCH64_SHA256(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA256)
#define IS_AARCH64_SHA512(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA512)
#define IS_AARCH64_RDM(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDM)
#define IS_AARCH64_SHA3(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA3)
#define IS_AARCH64_SM3(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SM3)
#define IS_AARCH64_SM4(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SM4)
#define IS_AARCH64_SB(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SB)
#endif
#ifdef HAVE_CPUID
void cpuid_set_flags(void);
word32 cpuid_get_flags(void);
static WC_INLINE int cpuid_get_flags_ex(cpuid_flags_t *flags) {
if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_UNINITED_VAL) {
word32 old_cpuid_flags = WC_CPUID_UNINITED_VAL;
return wolfSSL_Atomic_Uint_CompareExchange
(flags, &old_cpuid_flags, cpuid_get_flags());
}
else
return 0;
}
/* Public APIs to modify flags. */
WOLFSSL_API void cpuid_select_flags(word32 flags);
WOLFSSL_API void cpuid_set_flag(word32 flag);

View File

@@ -466,62 +466,123 @@
#endif
#ifndef WOLFSSL_NO_ATOMICS
#ifdef SINGLE_THREADED
typedef int wolfSSL_Atomic_Int;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) (x)
#define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val)
#define WOLFSSL_ATOMIC_OPS
#elif defined(HAVE_C___ATOMIC)
#ifdef __cplusplus
#if defined(__GNUC__) && defined(__ATOMIC_RELAXED)
/* C++ using direct calls to compiler built-in functions */
typedef volatile int wolfSSL_Atomic_Int;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) __atomic_load_n(&(x), __ATOMIC_CONSUME)
#define WOLFSSL_ATOMIC_STORE(x, val) __atomic_store_n(&(x), val, __ATOMIC_RELEASE)
#define WOLFSSL_ATOMIC_OPS
#endif
#else
#ifdef WOLFSSL_HAVE_ATOMIC_H
/* Default C Implementation */
#include <stdatomic.h>
typedef atomic_int wolfSSL_Atomic_Int;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) atomic_load(&(x))
#define WOLFSSL_ATOMIC_STORE(x, val) atomic_store(&(x), val)
#define WOLFSSL_ATOMIC_OPS
#endif /* WOLFSSL_HAVE_ATOMIC_H */
#endif
#elif defined(_MSC_VER) && !defined(WOLFSSL_NOT_WINDOWS_API)
/* Use MSVC compiler intrinsics for atomic ops */
#ifdef _WIN32_WCE
#include <armintr.h>
#else
#include <intrin.h>
#if defined(WOLFSSL_USER_DEFINED_ATOMICS)
/* user-supplied bindings for wolfSSL_Atomic_Int etc. */
#if !defined(WOLFSSL_ATOMIC_INITIALIZER) || \
!defined(WOLFSSL_ATOMIC_LOAD) || \
!defined(WOLFSSL_ATOMIC_STORE)
#error WOLFSSL_USER_DEFINED_ATOMICS is set but macro(s) are missing.
#else
#define WOLFSSL_ATOMIC_OPS
#endif
#elif defined(SINGLE_THREADED)
typedef int wolfSSL_Atomic_Int;
typedef unsigned int wolfSSL_Atomic_Uint;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) (x)
#define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val)
#define WOLFSSL_ATOMIC_OPS
#elif defined(HAVE_C___ATOMIC)
#ifdef __cplusplus
#if defined(__GNUC__) && defined(__ATOMIC_RELAXED)
/* C++ using direct calls to compiler built-in functions */
typedef volatile int wolfSSL_Atomic_Int;
typedef volatile unsigned int wolfSSL_Atomic_Uint;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) __atomic_load_n(&(x), \
__ATOMIC_CONSUME)
#define WOLFSSL_ATOMIC_STORE(x, val) __atomic_store_n(&(x), \
val, __ATOMIC_RELEASE)
#define WOLFSSL_ATOMIC_OPS
#endif
#else
#ifdef WOLFSSL_HAVE_ATOMIC_H
/* Default C Implementation */
#include <stdatomic.h>
typedef atomic_int wolfSSL_Atomic_Int;
typedef atomic_uint wolfSSL_Atomic_Uint;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) atomic_load(&(x))
#define WOLFSSL_ATOMIC_STORE(x, val) atomic_store(&(x), val)
#define WOLFSSL_ATOMIC_OPS
#endif /* WOLFSSL_HAVE_ATOMIC_H */
#endif
#elif defined(_MSC_VER) && !defined(WOLFSSL_NOT_WINDOWS_API)
/* Use MSVC compiler intrinsics for atomic ops */
#ifdef _WIN32_WCE
#include <armintr.h>
#else
#include <intrin.h>
#endif
typedef volatile long wolfSSL_Atomic_Int;
typedef volatile unsigned long wolfSSL_Atomic_Uint;
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) (x)
#define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val)
#define WOLFSSL_ATOMIC_OPS
#endif
typedef volatile long wolfSSL_Atomic_Int;
#ifndef WOLFSSL_ATOMIC_INITIALIZER
/* If we weren't able to implement atomics above, disable them here. */
#define WOLFSSL_NO_ATOMICS
#endif
#endif
#ifdef WOLFSSL_NO_ATOMICS
#define WOLFSSL_ATOMIC_INITIALIZER(x) (x)
#define WOLFSSL_ATOMIC_LOAD(x) (x)
#define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val)
#define WOLFSSL_ATOMIC_OPS
#endif
#endif /* WOLFSSL_NO_ATOMICS */
#if defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED)
/* WOLFSSL_ATOMIC_COERCE_INT() needs to accept either a regular int or an
* wolfSSL_Atomic_Int as its argument, and evaluate to a regular int.
* Allows a user-supplied override definition with type introspection.
*/
#ifndef WOLFSSL_ATOMIC_COERCE_INT
#define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x))
#endif
#ifndef WOLFSSL_ATOMIC_COERCE_UINT
#define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x))
#endif
#ifdef WOLFSSL_USER_DEFINED_ATOMICS
/* user-supplied bindings for wolfSSL_Atomic_Int_Init(),
* wolfSSL_Atomic_Int_FetchAdd(), etc.
*/
#elif defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED)
WOLFSSL_API void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i);
/* Fetch* functions return the value of the counter immediately preceding
* the effects of the function. */
WOLFSSL_API void wolfSSL_Atomic_Uint_Init(
wolfSSL_Atomic_Uint* c, unsigned int i);
/* FetchOp functions return the value of the counter immediately preceding
* the effects of the operation.
* OpFetch functions return the value of the counter immediately after
* the effects of the operation.
*/
WOLFSSL_API int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i);
WOLFSSL_API int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i);
WOLFSSL_API int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i);
WOLFSSL_API int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i);
WOLFSSL_API int wolfSSL_Atomic_Int_CompareExchange(
wolfSSL_Atomic_Int* c, int *expected_i, int new_i);
WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_FetchAdd(
wolfSSL_Atomic_Uint* c, unsigned int i);
WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_FetchSub(
wolfSSL_Atomic_Uint* c, unsigned int i);
WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_AddFetch(
wolfSSL_Atomic_Uint* c, unsigned int i);
WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_SubFetch(
wolfSSL_Atomic_Uint* c, unsigned int i);
WOLFSSL_API int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i);
#else
/* Code using these fallback implementations in non-SINGLE_THREADED builds
* needs to arrange its own explicit fallback to int for wolfSSL_Atomic_Int,
* which is not defined if !defined(WOLFSSL_ATOMIC_OPS) &&
* !defined(SINGLE_THREADED). This forces local awareness of thread-unsafe
* semantics.
* needs to arrange its own explicit fallback to int for wolfSSL_Atomic_Int
* and unsigned int for wolfSSL_Atomic_Uint, which is not defined if
* !defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED). This forces
* local awareness of thread-unsafe semantics.
*/
#define wolfSSL_Atomic_Int_Init(c, i) (*(c) = (i))
#define wolfSSL_Atomic_Uint_Init(c, i) (*(c) = (i))
static WC_INLINE int wolfSSL_Atomic_Int_FetchAdd(int *c, int i) {
int ret = *c;
*c += i;
@@ -532,6 +593,60 @@
*c -= i;
return ret;
}
static WC_INLINE int wolfSSL_Atomic_Int_AddFetch(int *c, int i) {
return (*c += i);
}
static WC_INLINE int wolfSSL_Atomic_Int_SubFetch(int *c, int i) {
return (*c -= i);
}
static WC_INLINE int wolfSSL_Atomic_Int_CompareExchange(
int *c, int *expected_i, int new_i)
{
if (*c == *expected_i) {
*c = new_i;
return 1;
}
else {
*expected_i = *c;
return 0;
}
}
static WC_INLINE unsigned int wolfSSL_Atomic_Uint_FetchAdd(
unsigned int *c, unsigned int i)
{
unsigned int ret = *c;
*c += i;
return ret;
}
static WC_INLINE unsigned int wolfSSL_Atomic_Uint_FetchSub(
unsigned int *c, unsigned int i)
{
unsigned int ret = *c;
*c -= i;
return ret;
}
static WC_INLINE unsigned int wolfSSL_Atomic_Uint_AddFetch(
unsigned int *c, unsigned int i)
{
return (*c += i);
}
static WC_INLINE unsigned int wolfSSL_Atomic_Uint_SubFetch(
unsigned int *c, unsigned int i)
{
return (*c -= i);
}
static WC_INLINE int wolfSSL_Atomic_Uint_CompareExchange(
unsigned int *c, unsigned int *expected_i, unsigned int new_i)
{
if (*c == *expected_i) {
*c = new_i;
return 1;
}
else {
*expected_i = *c;
return 0;
}
}
#endif
/* Reference counting. */