From bd4e723f9df4514b8e34305a43728b3b0fea17b4 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Wed, 13 Aug 2025 13:28:28 -0500 Subject: [PATCH 1/2] add cpuid_flags_t, WC_CPUID_INITIALIZER, and cpuid_get_flags_ex(); refactor all static flag initializations to use cpuid_get_flags_ex() for race-free dynamics; refactor cpuid_set_flags() to be race-free; wolfssl/wolfcrypt/wc_port.h and wolfcrypt/src/wc_port.c: add * WOLFSSL_ATOMIC_COERCE_INT() * WOLFSSL_ATOMIC_COERCE_UINT() * wolfSSL_Atomic_Uint * wolfSSL_Atomic_Uint_Init() * wolfSSL_Atomic_Int_AddFetch() * wolfSSL_Atomic_Int_SubFetch() * wolfSSL_Atomic_Int_CompareExchange() * wolfSSL_Atomic_Uint_FetchAdd() * wolfSSL_Atomic_Uint_FetchSub() * wolfSSL_Atomic_Uint_AddFetch() * wolfSSL_Atomic_Uint_SubFetch() * wolfSSL_Atomic_Uint_CompareExchange() wolfcrypt/test/test.c: add to memory_test() tests for all atomic macros and APIs; .github/workflows/pq-all.yml: don't use -Wpedantic for CC=c++ scenario. --- .github/workflows/pq-all.yml | 2 +- tests/api.c | 6 +- wolfcrypt/src/aes.c | 12 +- wolfcrypt/src/chacha.c | 8 +- wolfcrypt/src/cpuid.c | 192 +++++++++++++----------- wolfcrypt/src/dilithium.c | 4 +- wolfcrypt/src/poly1305.c | 8 +- wolfcrypt/src/port/arm/armv8-sha256.c | 13 +- wolfcrypt/src/port/arm/armv8-sha512.c | 13 +- wolfcrypt/src/random.c | 4 +- wolfcrypt/src/sha256.c | 7 +- wolfcrypt/src/sha3.c | 25 ++-- wolfcrypt/src/sha512.c | 7 +- wolfcrypt/src/wc_mlkem_poly.c | 4 +- wolfcrypt/src/wc_port.c | 208 ++++++++++++++++++++++++-- wolfcrypt/test/test.c | 85 +++++++++++ wolfssl/wolfcrypt/cpuid.h | 61 +++++--- wolfssl/wolfcrypt/wc_port.h | 183 +++++++++++++++++----- 18 files changed, 612 insertions(+), 230 deletions(-) diff --git a/.github/workflows/pq-all.yml b/.github/workflows/pq-all.yml index 70fc7dda9..fc32344f6 100644 --- a/.github/workflows/pq-all.yml +++ b/.github/workflows/pq-all.yml @@ -21,7 +21,7 @@ jobs: '--enable-intelasm --enable-sp-asm --enable-mlkem=yes,kyber,ml-kem CPPFLAGS="-DWOLFSSL_ML_KEM_USE_OLD_IDS"', '--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE"', '--enable-smallstack --enable-smallstackcache --enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE"', - '--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-pedantic -Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE" CC=c++' + '--enable-intelasm --enable-sp-asm --enable-all --enable-testcert --enable-acert --enable-dtls13 --enable-dtls-mtu --enable-dtls-frag-ch --enable-dtlscid --enable-quic --with-sys-crypto-policy --enable-experimental --enable-kyber=yes,original --enable-lms --enable-xmss --enable-dilithium --enable-dual-alg-certs --disable-qt CPPFLAGS="-Wdeclaration-after-statement -DWOLFCRYPT_TEST_LINT -DNO_WOLFSSL_CIPHER_SUITE_TEST -DTEST_LIBWOLFSSL_SOURCES_INCLUSION_SEQUENCE" CC=c++' ] name: make check if: github.repository_owner == 'wolfssl' diff --git a/tests/api.c b/tests/api.c index 295c671a1..b925b90d1 100644 --- a/tests/api.c +++ b/tests/api.c @@ -42241,10 +42241,10 @@ static int test_wolfSSL_dtls_bad_record(void) !defined(NO_WOLFSSL_CLIENT) && !defined(NO_WOLFSSL_SERVER) && \ defined(HAVE_IO_TESTS_DEPENDENCIES) static volatile int test_AEAD_seq_num = 0; -#ifdef WOLFSSL_ATOMIC_INITIALIZER -wolfSSL_Atomic_Int test_AEAD_done = WOLFSSL_ATOMIC_INITIALIZER(0); -#else +#ifdef WOLFSSL_NO_ATOMICS static volatile int test_AEAD_done = 0; +#else +wolfSSL_Atomic_Int test_AEAD_done = WOLFSSL_ATOMIC_INITIALIZER(0); #endif #ifdef WOLFSSL_MUTEX_INITIALIZER static wolfSSL_Mutex test_AEAD_mutex = WOLFSSL_MUTEX_INITIALIZER(test_AEAD_mutex); diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index de6a428c9..e0e2fdeac 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -624,11 +624,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits */ static int checkedAESNI = 0; static int haveAESNI = 0; - static word32 intel_flags = 0; + static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER; static WARN_UNUSED_RESULT int Check_CPU_support_AES(void) { - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); return IS_INTEL_AESNI(intel_flags) != 0; } @@ -786,15 +786,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #define NEED_AES_TABLES - static int checkedCpuIdFlags = 0; - static word32 cpuid_flags = 0; + static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; static void Check_CPU_support_HwCrypto(Aes* aes) { - if (checkedCpuIdFlags == 0) { - cpuid_flags = cpuid_get_flags(); - checkedCpuIdFlags = 1; - } + cpuid_get_flags_ex(&cpuid_flags); aes->use_aes_hw_crypto = IS_AARCH64_AES(cpuid_flags); #ifdef HAVE_AESGCM aes->use_pmull_hw_crypto = IS_AARCH64_PMULL(cpuid_flags); diff --git a/wolfcrypt/src/chacha.c b/wolfcrypt/src/chacha.c index d42186ec2..1ba0f25ce 100644 --- a/wolfcrypt/src/chacha.c +++ b/wolfcrypt/src/chacha.c @@ -109,8 +109,7 @@ Public domain. #define HAVE_INTEL_AVX2 #endif - static int cpuidFlagsSet = 0; - static word32 cpuidFlags = 0; + static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER; #endif /** @@ -332,10 +331,7 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, return 0; } - if (!cpuidFlagsSet) { - cpuidFlags = cpuid_get_flags(); - cpuidFlagsSet = 1; - } + cpuid_get_flags_ex(&cpuidFlags); #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(cpuidFlags)) { diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 978cbf536..4b1882dc3 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -25,8 +25,7 @@ #if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \ defined(HAVE_CPUID_AARCH64) - static word32 cpuid_check = 0; - static word32 cpuid_flags = 0; + static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #endif #ifdef HAVE_CPUID_INTEL @@ -81,21 +80,22 @@ } - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { - if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; } - if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; } - if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; } - if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; } - if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; } - if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; } - if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; } - if (cpuid_flag(1, 0, ECX, 22)) { cpuid_flags |= CPUID_MOVBE ; } - if (cpuid_flag(7, 0, EBX, 3)) { cpuid_flags |= CPUID_BMI1 ; } - if (cpuid_flag(7, 0, EBX, 29)) { cpuid_flags |= CPUID_SHA ; } - - cpuid_check = 1; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (cpuid_flag(1, 0, ECX, 28)) { new_cpuid_flags |= CPUID_AVX1 ; } + if (cpuid_flag(7, 0, EBX, 5)) { new_cpuid_flags |= CPUID_AVX2 ; } + if (cpuid_flag(7, 0, EBX, 8)) { new_cpuid_flags |= CPUID_BMI2 ; } + if (cpuid_flag(1, 0, ECX, 30)) { new_cpuid_flags |= CPUID_RDRAND; } + if (cpuid_flag(7, 0, EBX, 18)) { new_cpuid_flags |= CPUID_RDSEED; } + if (cpuid_flag(1, 0, ECX, 25)) { new_cpuid_flags |= CPUID_AESNI ; } + if (cpuid_flag(7, 0, EBX, 19)) { new_cpuid_flags |= CPUID_ADX ; } + if (cpuid_flag(1, 0, ECX, 22)) { new_cpuid_flags |= CPUID_MOVBE ; } + if (cpuid_flag(7, 0, EBX, 3)) { new_cpuid_flags |= CPUID_BMI1 ; } + if (cpuid_flag(7, 0, EBX, 29)) { new_cpuid_flags |= CPUID_SHA ; } + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #elif defined(HAVE_CPUID_AARCH64) @@ -113,9 +113,10 @@ /* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers * /ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0 */ - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; word64 features; __asm__ __volatile ( @@ -126,25 +127,26 @@ ); if (features & CPUID_AARCH64_FEAT_AES) - cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_AES; if (features & CPUID_AARCH64_FEAT_AES_PMULL) { - cpuid_flags |= CPUID_AES; - cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_PMULL; } if (features & CPUID_AARCH64_FEAT_SHA256) - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_SHA256; if (features & CPUID_AARCH64_FEAT_SHA256_512) - cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; + new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; if (features & CPUID_AARCH64_FEAT_RDM) - cpuid_flags |= CPUID_RDM; + new_cpuid_flags |= CPUID_RDM; if (features & CPUID_AARCH64_FEAT_SHA3) - cpuid_flags |= CPUID_SHA3; + new_cpuid_flags |= CPUID_SHA3; if (features & CPUID_AARCH64_FEAT_SM3) - cpuid_flags |= CPUID_SM3; + new_cpuid_flags |= CPUID_SM3; if (features & CPUID_AARCH64_FEAT_SM4) - cpuid_flags |= CPUID_SM4; + new_cpuid_flags |= CPUID_SM4; - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #elif defined(__linux__) @@ -154,42 +156,44 @@ #include #include - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; word64 hwcaps = getauxval(AT_HWCAP); #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO if (hwcaps & HWCAP_AES) - cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_AES; if (hwcaps & HWCAP_PMULL) - cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_PMULL; if (hwcaps & HWCAP_SHA2) - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_SHA256; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 if (hwcaps & HWCAP_SHA512) - cpuid_flags |= CPUID_SHA512; + new_cpuid_flags |= CPUID_SHA512; #endif #if defined(HWCAP_ASIMDRDM) && !defined(WOLFSSL_AARCH64_NO_SQRDMLSH) if (hwcaps & HWCAP_ASIMDRDM) - cpuid_flags |= CPUID_RDM; + new_cpuid_flags |= CPUID_RDM; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 if (hwcaps & HWCAP_SHA3) - cpuid_flags |= CPUID_SHA3; + new_cpuid_flags |= CPUID_SHA3; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SM3 if (hwcaps & HWCAP_SM3) - cpuid_flags |= CPUID_SM3; + new_cpuid_flags |= CPUID_SM3; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SM4 if (hwcaps & HWCAP_SM4) - cpuid_flags |= CPUID_SM4; + new_cpuid_flags |= CPUID_SM4; #endif (void)hwcaps; - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #elif defined(__ANDROID__) || defined(ANDROID) @@ -198,19 +202,21 @@ #include "cpu-features.h" - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; word64 features = android_getCpuFeatures(); if (features & ANDROID_CPU_ARM_FEATURE_AES) - cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_AES; if (features & ANDROID_CPU_ARM_FEATURE_PMULL) - cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_PMULL; if (features & ANDROID_CPU_ARM_FEATURE_SHA2) - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_SHA256; - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #elif defined(__APPLE__) @@ -229,29 +235,31 @@ return ret; } - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0) - cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_AES; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0) - cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_PMULL; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0) - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_SHA256; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0) - cpuid_flags |= CPUID_SHA512; + new_cpuid_flags |= CPUID_SHA512; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0) - cpuid_flags |= CPUID_RDM; + new_cpuid_flags |= CPUID_RDM; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0) - cpuid_flags |= CPUID_SHA3; + new_cpuid_flags |= CPUID_SHA3; #ifdef WOLFSSL_ARMASM_CRYPTO_SM3 - cpuid_flags |= CPUID_SM3; + new_cpuid_flags |= CPUID_SM3; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SM4 - cpuid_flags |= CPUID_SM4; + new_cpuid_flags |= CPUID_SM4; #endif - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #elif defined(__FreeBSD__) || defined(__OpenBSD__) @@ -259,70 +267,75 @@ #include - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; word64 features = 0; elf_aux_info(AT_HWCAP, &features, sizeof(features)); if (features & CPUID_AARCH64_FEAT_AES) - cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_AES; if (features & CPUID_AARCH64_FEAT_AES_PMULL) { - cpuid_flags |= CPUID_AES; - cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_PMULL; } if (features & CPUID_AARCH64_FEAT_SHA256) - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_SHA256; if (features & CPUID_AARCH64_FEAT_SHA256_512) - cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; + new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512; if (features & CPUID_AARCH64_FEAT_RDM) - cpuid_flags |= CPUID_RDM; + new_cpuid_flags |= CPUID_RDM; if (features & CPUID_AARCH64_FEAT_SHA3) - cpuid_flags |= CPUID_SHA3; + new_cpuid_flags |= CPUID_SHA3; if (features & CPUID_AARCH64_FEAT_SM3) - cpuid_flags |= CPUID_SM3; + new_cpuid_flags |= CPUID_SM3; if (features & CPUID_AARCH64_FEAT_SM4) - cpuid_flags |= CPUID_SM4; + new_cpuid_flags |= CPUID_SM4; - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #else - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO - cpuid_flags |= CPUID_AES; - cpuid_flags |= CPUID_PMULL; - cpuid_flags |= CPUID_SHA256; + new_cpuid_flags |= CPUID_AES; + new_cpuid_flags |= CPUID_PMULL; + new_cpuid_flags |= CPUID_SHA256; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 - cpuid_flags |= CPUID_SHA512; + new_cpuid_flags |= CPUID_SHA512; #endif #ifndef WOLFSSL_AARCH64_NO_SQRDMLSH - cpuid_flags |= CPUID_RDM; + new_cpuid_flags |= CPUID_RDM; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 - cpuid_flags |= CPUID_SHA3; + new_cpuid_flags |= CPUID_SHA3; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SM3 - cpuid_flags |= CPUID_SM3; + new_cpuid_flags |= CPUID_SM3; #endif #ifdef WOLFSSL_ARMASM_CRYPTO_SM4 - cpuid_flags |= CPUID_SM4; + new_cpuid_flags |= CPUID_SM4; #endif - cpuid_check = 1; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #endif #elif defined(HAVE_CPUID) - void cpuid_set_flags(void) + static WC_INLINE void cpuid_set_flags(void) { - if (!cpuid_check) { - cpuid_flags = 0; - cpuid_check = 1; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { + word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + (void)wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } } #endif @@ -331,24 +344,25 @@ word32 cpuid_get_flags(void) { - if (!cpuid_check) - cpuid_set_flags(); - return cpuid_flags; + cpuid_set_flags(); + return WOLFSSL_ATOMIC_LOAD(cpuid_flags); } void cpuid_select_flags(word32 flags) { - cpuid_flags = flags; + WOLFSSL_ATOMIC_STORE(cpuid_flags, flags); } void cpuid_set_flag(word32 flag) { - cpuid_flags |= flag; + WOLFSSL_ATOMIC_STORE + (cpuid_flags, WOLFSSL_ATOMIC_LOAD(cpuid_flags) | flag); } void cpuid_clear_flag(word32 flag) { - cpuid_flags &= ~flag; + WOLFSSL_ATOMIC_STORE + (cpuid_flags, WOLFSSL_ATOMIC_LOAD(cpuid_flags) & ~flag); } #endif /* HAVE_CPUID */ diff --git a/wolfcrypt/src/dilithium.c b/wolfcrypt/src/dilithium.c index 737b3c637..0e2b32b9d 100644 --- a/wolfcrypt/src/dilithium.c +++ b/wolfcrypt/src/dilithium.c @@ -169,7 +169,7 @@ #ifdef WOLFSSL_WC_DILITHIUM #if defined(USE_INTEL_SPEEDUP) -static word32 cpuid_flags = 0; +static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #endif #ifdef DEBUG_DILITHIUM @@ -10623,7 +10623,7 @@ int wc_dilithium_init_ex(dilithium_key* key, void* heap, int devId) } #if defined(WOLFSSL_WC_DILITHIUM) && defined(USE_INTEL_SPEEDUP) - cpuid_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&cpuid_flags); #endif return ret; diff --git a/wolfcrypt/src/poly1305.c b/wolfcrypt/src/poly1305.c index a3e009317..80286eb97 100644 --- a/wolfcrypt/src/poly1305.c +++ b/wolfcrypt/src/poly1305.c @@ -83,8 +83,7 @@ and Daniel J. Bernstein #endif #ifdef USE_INTEL_POLY1305_SPEEDUP -static word32 intel_flags = 0; -static word32 cpu_flags_set = 0; +static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER; #endif #if defined(USE_INTEL_POLY1305_SPEEDUP) || defined(POLY130564) @@ -513,10 +512,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) return BAD_FUNC_ARG; #ifdef USE_INTEL_POLY1305_SPEEDUP - if (!cpu_flags_set) { - intel_flags = cpuid_get_flags(); - cpu_flags_set = 1; - } + cpuid_get_flags_ex(&intel_flags); SAVE_VECTOR_REGISTERS(return _svr_ret;); #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(intel_flags)) diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index cbd4ee6dc..e9f0f1e32 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -274,8 +274,7 @@ static void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, #endif #if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) -static word32 cpuid_flags = 0; -static int cpuid_flags_set = 0; +static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #endif static int InitSha256(wc_Sha256* sha256) @@ -1763,10 +1762,7 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) #endif #if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; - } + cpuid_get_flags_ex(&cpuid_flags); #endif (void)devId; @@ -2048,10 +2044,7 @@ int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data, sha224->heap = heap; #if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; - } + cpuid_get_flags_ex(&cpuid_flags); #endif (void)devId; diff --git a/wolfcrypt/src/port/arm/armv8-sha512.c b/wolfcrypt/src/port/arm/armv8-sha512.c index cc51c900d..57020f01c 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512.c +++ b/wolfcrypt/src/port/arm/armv8-sha512.c @@ -57,8 +57,7 @@ #endif #if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512) -static word32 cpuid_flags = 0; -static int cpuid_flags_set = 0; +static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #endif #ifdef WOLFSSL_SHA512 @@ -198,10 +197,7 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, return ret; #if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512) - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; - } + cpuid_get_flags_ex(&cpuid_flags); #endif (void)devId; @@ -884,10 +880,7 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) #endif #if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512) - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; - } + cpuid_get_flags_ex(&cpuid_flags); #endif (void)devId; diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index 2ec56b212..371fe0c1d 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -184,10 +184,10 @@ This library contains implementation for the random number generator. #if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \ defined(HAVE_AMD_RDSEED) - static word32 intel_flags = 0; + static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER; static void wc_InitRng_IntelRD(void) { - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); } #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED) static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz); diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index f8037d4f2..6fc71d9a5 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -388,7 +388,7 @@ static int InitSha256(wc_Sha256* sha256) } /* extern "C" */ #endif - static word32 intel_flags = 0; + static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER; #if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS) #define WC_NO_INTERNAL_FUNCTION_POINTERS @@ -425,8 +425,7 @@ static int InitSha256(wc_Sha256* sha256) } #endif - if (intel_flags == 0) - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); if (IS_INTEL_SHA(intel_flags)) { #ifdef HAVE_INTEL_AVX1 @@ -601,7 +600,7 @@ static int InitSha256(wc_Sha256* sha256) if (transform_check) return; - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); if (IS_INTEL_SHA(intel_flags)) { #ifdef HAVE_INTEL_AVX1 diff --git a/wolfcrypt/src/sha3.c b/wolfcrypt/src/sha3.c index 32a37a716..7ed2272a2 100644 --- a/wolfcrypt/src/sha3.c +++ b/wolfcrypt/src/sha3.c @@ -67,8 +67,7 @@ defined(WOLFSSL_ARMASM)) #include - word32 cpuid_flags; - int cpuid_flags_set = 0; + static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #ifdef WC_C_DYNAMIC_FALLBACK #define SHA3_BLOCK (sha3->sha3_block) #define SHA3_BLOCK_N (sha3->sha3_block_n) @@ -612,17 +611,19 @@ static int InitSha3(wc_Sha3* sha3) #endif #ifdef USE_INTEL_SPEEDUP - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; -#ifdef WC_C_DYNAMIC_FALLBACK - } { + int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags); +#ifdef WC_C_DYNAMIC_FALLBACK + (void)cpuid_flags_were_updated; if (! CAN_SAVE_VECTOR_REGISTERS()) { SHA3_BLOCK = BlockSha3; SHA3_BLOCK_N = NULL; } else +#else + if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) { + } + else #endif if (IS_INTEL_AVX2(cpuid_flags)) { SHA3_BLOCK = sha3_block_avx2; @@ -638,11 +639,13 @@ static int InitSha3(wc_Sha3* sha3) } } #define SHA3_FUNC_PTR -#endif +#endif /* USE_INTEL_SPEEDUP */ #if defined(__aarch64__) && defined(WOLFSSL_ARMASM) - if (!cpuid_flags_set) { - cpuid_flags = cpuid_get_flags(); - cpuid_flags_set = 1; + { + int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags); + if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) { + } + else #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 if (IS_AARCH64_SHA3(cpuid_flags)) { SHA3_BLOCK = BlockSha3_crypto; diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index bf02483a6..60d30b7b6 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -544,7 +544,7 @@ static int InitSha512_256(wc_Sha512* sha512) } /* extern "C" */ #endif - static word32 intel_flags = 0; + static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER; #if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS) #define WC_NO_INTERNAL_FUNCTION_POINTERS @@ -582,8 +582,7 @@ static int InitSha512_256(wc_Sha512* sha512) } #endif - if (intel_flags == 0) - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); #if defined(HAVE_INTEL_AVX2) if (IS_INTEL_AVX2(intel_flags)) { @@ -724,7 +723,7 @@ static int InitSha512_256(wc_Sha512* sha512) if (transform_check) return; - intel_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&intel_flags); #if defined(HAVE_INTEL_AVX2) if (IS_INTEL_AVX2(intel_flags)) { diff --git a/wolfcrypt/src/wc_mlkem_poly.c b/wolfcrypt/src/wc_mlkem_poly.c index 78d3bb9ff..9f8184de3 100644 --- a/wolfcrypt/src/wc_mlkem_poly.c +++ b/wolfcrypt/src/wc_mlkem_poly.c @@ -103,7 +103,7 @@ extern volatile sword16 mlkem_opt_blocker; #if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \ defined(WOLFSSL_ARMASM)) -static word32 cpuid_flags = 0; +static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; #endif /* Half of Q plus one. Converted message bit value of 1. */ @@ -1243,7 +1243,7 @@ void mlkem_init(void) { #if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \ defined(WOLFSSL_ARMASM)) - cpuid_flags = cpuid_get_flags(); + cpuid_get_flags_ex(&cpuid_flags); #endif } diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c index f00719e29..76ea14651 100644 --- a/wolfcrypt/src/wc_port.c +++ b/wolfcrypt/src/wc_port.c @@ -1282,6 +1282,11 @@ void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i) *c = i; } +void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i) +{ + *c = i; +} + int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i) { return __atomic_fetch_add(c, i, __ATOMIC_RELAXED); @@ -1291,13 +1296,68 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i) { return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED); } + +int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i) +{ + return __atomic_add_fetch(c, i, __ATOMIC_RELAXED); +} + +int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i) +{ + return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED); +} + +int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i, + int new_i) +{ + return __atomic_compare_exchange_n(c, expected_i, new_i, 0 /* weak */, + __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); +} + +unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return __atomic_fetch_add(c, i, __ATOMIC_RELAXED); +} + +unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED); +} + +unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return __atomic_add_fetch(c, i, __ATOMIC_RELAXED); +} + +unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED); +} + +int wolfSSL_Atomic_Uint_CompareExchange( + wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i) +{ + return __atomic_compare_exchange_n( + c, expected_i, new_i, 0 /* weak */, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); +} + #else + /* Default C Implementation */ void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i) { atomic_init(c, i); } +void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i) +{ + atomic_init(c, i); +} + int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i) { return atomic_fetch_add_explicit(c, i, memory_order_relaxed); @@ -1307,16 +1367,73 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i) { return atomic_fetch_sub_explicit(c, i, memory_order_relaxed); } + +int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i) +{ + int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed); + return ret + i; +} + +int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i) +{ + int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed); + return ret - i; +} + +int wolfSSL_Atomic_Int_CompareExchange( + wolfSSL_Atomic_Int* c, int *expected_i, int new_i) +{ + return atomic_compare_exchange_strong_explicit( + c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire); +} + +unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return atomic_fetch_add_explicit(c, i, memory_order_relaxed); +} + +unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return atomic_fetch_sub_explicit(c, i, memory_order_relaxed); +} + +unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + unsigned int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed); + return ret + i; +} + +unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + unsigned int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed); + return ret - i; +} + +int wolfSSL_Atomic_Uint_CompareExchange( + wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i) +{ + return atomic_compare_exchange_strong_explicit( + c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire); +} + #endif /* __cplusplus */ #elif defined(_MSC_VER) -/* Default C Implementation */ void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i) { *c = i; } +void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i) +{ + *c = i; +} + int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i) { return (int)_InterlockedExchangeAdd(c, (long)i); @@ -1327,6 +1444,76 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i) return (int)_InterlockedExchangeAdd(c, (long)-i); } +int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i) +{ + int ret = (int)_InterlockedExchangeAdd(c, (long)i); + return ret + i; +} + +int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i) +{ + int ret = (int)_InterlockedExchangeAdd(c, (long)-i); + return ret - i; +} + +int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i, + int new_i) +{ + long actual_i = InterlockedCompareExchange(c, (long)new_i, + (long)*expected_i); + if (actual_i == (long)*expected_i) { + return 1; + } + else { + *expected_i = (int)actual_i; + return 0; + } +} + +unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c, + (long)i); +} + +unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c, + -(long)i); +} + +unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + unsigned int ret = (unsigned int)_InterlockedExchangeAdd + ((wolfSSL_Atomic_Int *)c, (long)i); + return ret + i; +} + +unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c, + unsigned int i) +{ + unsigned int ret = (unsigned int)_InterlockedExchangeAdd + ((wolfSSL_Atomic_Int *)c, -(long)i); + return ret - i; +} + +int wolfSSL_Atomic_Uint_CompareExchange( + wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i) +{ + long actual_i = InterlockedCompareExchange + ((wolfSSL_Atomic_Int *)c, (long)new_i, (long)*expected_i); + if (actual_i == (long)*expected_i) { + return 1; + } + else { + *expected_i = (unsigned int)actual_i; + return 0; + } +} + #endif #endif /* WOLFSSL_ATOMIC_OPS */ @@ -1395,7 +1582,8 @@ void wolfSSL_RefWithMutexDec(wolfSSL_RefWithMutex* ref, int* isZero, int* err) #if WOLFSSL_CRYPT_HW_MUTEX /* Mutex for protection of cryptography hardware */ -static wolfSSL_Mutex wcCryptHwMutex WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex); +static wolfSSL_Mutex wcCryptHwMutex + WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex); #ifndef WOLFSSL_MUTEX_INITIALIZER static int wcCryptHwMutexInit = 0; #endif @@ -1437,20 +1625,20 @@ int wolfSSL_CryptHwMutexUnLock(void) #if WOLFSSL_CRYPT_HW_MUTEX && defined(WOLFSSL_ALGO_HW_MUTEX) /* Mutex for protection of cryptography hardware */ #ifndef NO_RNG_MUTEX -static wolfSSL_Mutex wcCryptHwRngMutex \ - WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex); +static wolfSSL_Mutex wcCryptHwRngMutex + WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex); #endif /* NO_RNG_MUTEX */ #ifndef NO_AES_MUTEX -static wolfSSL_Mutex wcCryptHwAesMutex \ - WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex); +static wolfSSL_Mutex wcCryptHwAesMutex + WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex); #endif /* NO_AES_MUTEX */ #ifndef NO_HASH_MUTEX -static wolfSSL_Mutex wcCryptHwHashMutex \ - WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex); +static wolfSSL_Mutex wcCryptHwHashMutex + WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex); #endif /* NO_HASH_MUTEX */ #ifndef NO_PK_MUTEX -static wolfSSL_Mutex wcCryptHwPkMutex \ - WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex); +static wolfSSL_Mutex wcCryptHwPkMutex + WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex); #endif /* NO_PK_MUTEX */ #ifndef WOLFSSL_MUTEX_INITIALIZER diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index fc0537adb..f58d876cf 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -18892,6 +18892,91 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t memory_test(void) if (const_byte_ptr_test(const_byte_array, &j) != CBPTR_EXPECTED) { ret = 1; } + if (ret != 0) + return WC_TEST_RET_ENC_NC; + } + + { + +#ifdef WOLFSSL_NO_ATOMICS + int a_int = WOLFSSL_ATOMIC_INITIALIZER(-2); + unsigned int a_uint = WOLFSSL_ATOMIC_INITIALIZER(2); +#else + wolfSSL_Atomic_Int a_int = WOLFSSL_ATOMIC_INITIALIZER(-2); + wolfSSL_Atomic_Uint a_uint = WOLFSSL_ATOMIC_INITIALIZER(2); +#endif + int int_expected; + unsigned int uint_expected; + + if (WOLFSSL_ATOMIC_LOAD(a_int) != -2) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 2) + return WC_TEST_RET_ENC_NC; + wolfSSL_Atomic_Int_Init(&a_int, -3); + if (WOLFSSL_ATOMIC_LOAD(a_int) != -3) + return WC_TEST_RET_ENC_NC; + wolfSSL_Atomic_Uint_Init(&a_uint, 3); + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 3) + return WC_TEST_RET_ENC_NC; + WOLFSSL_ATOMIC_STORE(a_int, -4); + if (WOLFSSL_ATOMIC_LOAD(a_int) != -4) + return WC_TEST_RET_ENC_NC; + WOLFSSL_ATOMIC_STORE(a_uint, 4); + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4) + return WC_TEST_RET_ENC_NC; + + if (wolfSSL_Atomic_Int_FetchAdd(&a_int, 2) != -4) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_int) != -2) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Uint_FetchAdd(&a_uint, 2) != 4) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 6) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Int_FetchSub(&a_int, 2) != -2) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_int) != -4) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Uint_FetchSub(&a_uint, 2) != 6) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4) + return WC_TEST_RET_ENC_NC; + + if (wolfSSL_Atomic_Int_AddFetch(&a_int, 2) != -2) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_int) != -2) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Uint_AddFetch(&a_uint, 2) != 6) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 6) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Int_SubFetch(&a_int, 2) != -4) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_int) != -4) + return WC_TEST_RET_ENC_NC; + if (wolfSSL_Atomic_Uint_SubFetch(&a_uint, 2) != 4) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 4) + return WC_TEST_RET_ENC_NC; + + int_expected = -5; + if (wolfSSL_Atomic_Int_CompareExchange(&a_int, &int_expected, -7)) + return WC_TEST_RET_ENC_NC; + if (int_expected != -4) + return WC_TEST_RET_ENC_NC; + if (! wolfSSL_Atomic_Int_CompareExchange(&a_int, &int_expected, -7)) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_int) != -7) + return WC_TEST_RET_ENC_NC; + uint_expected = 5; + if (wolfSSL_Atomic_Uint_CompareExchange(&a_uint, &uint_expected, 7)) + return WC_TEST_RET_ENC_NC; + if (uint_expected != 4) + return WC_TEST_RET_ENC_NC; + if (! wolfSSL_Atomic_Uint_CompareExchange(&a_uint, &uint_expected, 7)) + return WC_TEST_RET_ENC_NC; + if (WOLFSSL_ATOMIC_LOAD(a_uint) != 7) + return WC_TEST_RET_ENC_NC; } return ret; diff --git a/wolfssl/wolfcrypt/cpuid.h b/wolfssl/wolfcrypt/cpuid.h index e6b7eb667..56bc61401 100644 --- a/wolfssl/wolfcrypt/cpuid.h +++ b/wolfssl/wolfcrypt/cpuid.h @@ -44,6 +44,16 @@ #define HAVE_CPUID_AARCH64 #endif +#define WC_CPUID_UNINITED_VAL 0xffffffffU +#if !defined(WOLFSSL_NO_ATOMICS) && !defined(SINGLE_THREADED) + typedef wolfSSL_Atomic_Uint cpuid_flags_t; + #define WC_CPUID_INITIALIZER \ + WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_UNINITED_VAL) +#else + typedef word32 cpuid_flags_t; + #define WC_CPUID_INITIALIZER WC_CPUID_UNINITED_VAL +#endif + #ifdef HAVE_CPUID_INTEL #define CPUID_AVX1 0x0001 @@ -57,16 +67,16 @@ #define CPUID_BMI1 0x0100 /* ANDN */ #define CPUID_SHA 0x0200 /* SHA-1 and SHA-256 instructions */ - #define IS_INTEL_AVX1(f) ((f) & CPUID_AVX1) - #define IS_INTEL_AVX2(f) ((f) & CPUID_AVX2) - #define IS_INTEL_RDRAND(f) ((f) & CPUID_RDRAND) - #define IS_INTEL_RDSEED(f) ((f) & CPUID_RDSEED) - #define IS_INTEL_BMI2(f) ((f) & CPUID_BMI2) - #define IS_INTEL_AESNI(f) ((f) & CPUID_AESNI) - #define IS_INTEL_ADX(f) ((f) & CPUID_ADX) - #define IS_INTEL_MOVBE(f) ((f) & CPUID_MOVBE) - #define IS_INTEL_BMI1(f) ((f) & CPUID_BMI1) - #define IS_INTEL_SHA(f) ((f) & CPUID_SHA) + #define IS_INTEL_AVX1(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AVX1) + #define IS_INTEL_AVX2(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AVX2) + #define IS_INTEL_RDRAND(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDRAND) + #define IS_INTEL_RDSEED(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDSEED) + #define IS_INTEL_BMI2(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_BMI2) + #define IS_INTEL_AESNI(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AESNI) + #define IS_INTEL_ADX(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_ADX) + #define IS_INTEL_MOVBE(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_MOVBE) + #define IS_INTEL_BMI1(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_BMI1) + #define IS_INTEL_SHA(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA) #elif defined(HAVE_CPUID_AARCH64) @@ -80,23 +90,32 @@ #define CPUID_SM4 0x0080 /* SM4 enc/dec */ #define CPUID_SB 0x0100 /* Speculation barrier */ - #define IS_AARCH64_AES(f) ((f) & CPUID_AES) - #define IS_AARCH64_PMULL(f) ((f) & CPUID_PMULL) - #define IS_AARCH64_SHA256(f) ((f) & CPUID_SHA256) - #define IS_AARCH64_SHA512(f) ((f) & CPUID_SHA512) - #define IS_AARCH64_RDM(f) ((f) & CPUID_RDM) - #define IS_AARCH64_SHA3(f) ((f) & CPUID_SHA3) - #define IS_AARCH64_SM3(f) ((f) & CPUID_SM3) - #define IS_AARCH64_SM4(f) ((f) & CPUID_SM4) - #define IS_AARCH64_SB(f) ((f) & CPUID_SB) + #define IS_AARCH64_AES(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_AES) + #define IS_AARCH64_PMULL(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_PMULL) + #define IS_AARCH64_SHA256(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA256) + #define IS_AARCH64_SHA512(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA512) + #define IS_AARCH64_RDM(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_RDM) + #define IS_AARCH64_SHA3(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SHA3) + #define IS_AARCH64_SM3(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SM3) + #define IS_AARCH64_SM4(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SM4) + #define IS_AARCH64_SB(f) (WOLFSSL_ATOMIC_COERCE_UINT(f) & CPUID_SB) #endif #ifdef HAVE_CPUID - void cpuid_set_flags(void); word32 cpuid_get_flags(void); - /* Public APIs to modify flags. */ + static WC_INLINE int cpuid_get_flags_ex(cpuid_flags_t *flags) { + if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_UNINITED_VAL) { + word32 old_cpuid_flags = WC_CPUID_UNINITED_VAL; + return wolfSSL_Atomic_Uint_CompareExchange + (flags, &old_cpuid_flags, cpuid_get_flags()); + } + else + return 0; + } + + /* Public APIs to modify flags -- note that these are not threadsafe. */ WOLFSSL_API void cpuid_select_flags(word32 flags); WOLFSSL_API void cpuid_set_flag(word32 flag); WOLFSSL_API void cpuid_clear_flag(word32 flag); diff --git a/wolfssl/wolfcrypt/wc_port.h b/wolfssl/wolfcrypt/wc_port.h index 9ccfca6cf..634f2e17a 100644 --- a/wolfssl/wolfcrypt/wc_port.h +++ b/wolfssl/wolfcrypt/wc_port.h @@ -466,62 +466,109 @@ #endif #ifndef WOLFSSL_NO_ATOMICS -#ifdef SINGLE_THREADED - typedef int wolfSSL_Atomic_Int; - #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) - #define WOLFSSL_ATOMIC_LOAD(x) (x) - #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) - #define WOLFSSL_ATOMIC_OPS -#elif defined(HAVE_C___ATOMIC) -#ifdef __cplusplus -#if defined(__GNUC__) && defined(__ATOMIC_RELAXED) - /* C++ using direct calls to compiler built-in functions */ - typedef volatile int wolfSSL_Atomic_Int; - #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) - #define WOLFSSL_ATOMIC_LOAD(x) __atomic_load_n(&(x), __ATOMIC_CONSUME) - #define WOLFSSL_ATOMIC_STORE(x, val) __atomic_store_n(&(x), val, __ATOMIC_RELEASE) - #define WOLFSSL_ATOMIC_OPS -#endif -#else - #ifdef WOLFSSL_HAVE_ATOMIC_H - /* Default C Implementation */ - #include - typedef atomic_int wolfSSL_Atomic_Int; - #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) - #define WOLFSSL_ATOMIC_LOAD(x) atomic_load(&(x)) - #define WOLFSSL_ATOMIC_STORE(x, val) atomic_store(&(x), val) - #define WOLFSSL_ATOMIC_OPS - #endif /* WOLFSSL_HAVE_ATOMIC_H */ -#endif -#elif defined(_MSC_VER) && !defined(WOLFSSL_NOT_WINDOWS_API) - /* Use MSVC compiler intrinsics for atomic ops */ - #ifdef _WIN32_WCE - #include - #else - #include + #ifdef SINGLE_THREADED + typedef int wolfSSL_Atomic_Int; + typedef unsigned int wolfSSL_Atomic_Uint; + #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) + #define WOLFSSL_ATOMIC_LOAD(x) (x) + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) + #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) + #define WOLFSSL_ATOMIC_OPS + #elif defined(HAVE_C___ATOMIC) + #ifdef __cplusplus + #if defined(__GNUC__) && defined(__ATOMIC_RELAXED) + /* C++ using direct calls to compiler built-in functions */ + typedef volatile int wolfSSL_Atomic_Int; + typedef volatile unsigned int wolfSSL_Atomic_Uint; + #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) + #define WOLFSSL_ATOMIC_LOAD(x) __atomic_load_n(&(x), \ + __ATOMIC_CONSUME) + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) + #define WOLFSSL_ATOMIC_STORE(x, val) __atomic_store_n(&(x), \ + val, __ATOMIC_RELEASE) + #define WOLFSSL_ATOMIC_OPS + #endif + #else + #ifdef WOLFSSL_HAVE_ATOMIC_H + /* Default C Implementation */ + #include + typedef atomic_int wolfSSL_Atomic_Int; + typedef atomic_uint wolfSSL_Atomic_Uint; + #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) + #define WOLFSSL_ATOMIC_LOAD(x) atomic_load(&(x)) + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) + #define WOLFSSL_ATOMIC_STORE(x, val) atomic_store(&(x), val) + #define WOLFSSL_ATOMIC_OPS + #endif /* WOLFSSL_HAVE_ATOMIC_H */ + #endif + #elif defined(_MSC_VER) && !defined(WOLFSSL_NOT_WINDOWS_API) + /* Use MSVC compiler intrinsics for atomic ops */ + #ifdef _WIN32_WCE + #include + #else + #include + #endif + typedef volatile long wolfSSL_Atomic_Int; + typedef volatile unsigned long wolfSSL_Atomic_Uint; + #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) + #define WOLFSSL_ATOMIC_LOAD(x) (x) + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) + #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) + #define WOLFSSL_ATOMIC_OPS #endif - typedef volatile long wolfSSL_Atomic_Int; + + #ifndef WOLFSSL_ATOMIC_INITIALIZER + /* If we weren't able to implement atomics above, disable them here. */ + #define WOLFSSL_NO_ATOMICS + #endif +#endif + +#ifdef WOLFSSL_NO_ATOMICS #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) (x) + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) - #define WOLFSSL_ATOMIC_OPS -#endif #endif /* WOLFSSL_NO_ATOMICS */ #if defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED) WOLFSSL_API void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i); + WOLFSSL_API void wolfSSL_Atomic_Uint_Init( + wolfSSL_Atomic_Uint* c, unsigned int i); /* Fetch* functions return the value of the counter immediately preceding - * the effects of the function. */ + * the effects of the operation. + * *Fetch functions return the value of the counter immediately after + * the effects of the operation. + */ WOLFSSL_API int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i); WOLFSSL_API int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i); + WOLFSSL_API int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i); + WOLFSSL_API int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i); + WOLFSSL_API int wolfSSL_Atomic_Int_CompareExchange( + wolfSSL_Atomic_Int* c, int *expected_i, int new_i); + WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_FetchAdd( + wolfSSL_Atomic_Uint* c, unsigned int i); + WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_FetchSub( + wolfSSL_Atomic_Uint* c, unsigned int i); + WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_AddFetch( + wolfSSL_Atomic_Uint* c, unsigned int i); + WOLFSSL_API unsigned int wolfSSL_Atomic_Uint_SubFetch( + wolfSSL_Atomic_Uint* c, unsigned int i); + WOLFSSL_API int wolfSSL_Atomic_Uint_CompareExchange( + wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i); #else /* Code using these fallback implementations in non-SINGLE_THREADED builds - * needs to arrange its own explicit fallback to int for wolfSSL_Atomic_Int, - * which is not defined if !defined(WOLFSSL_ATOMIC_OPS) && - * !defined(SINGLE_THREADED). This forces local awareness of thread-unsafe - * semantics. + * needs to arrange its own explicit fallback to int for wolfSSL_Atomic_Int + * and unsigned int for wolfSSL_Atomic_Uint, which is not defined if + * !defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED). This forces + * local awareness of thread-unsafe semantics. */ #define wolfSSL_Atomic_Int_Init(c, i) (*(c) = (i)) + #define wolfSSL_Atomic_Uint_Init(c, i) (*(c) = (i)) static WC_INLINE int wolfSSL_Atomic_Int_FetchAdd(int *c, int i) { int ret = *c; *c += i; @@ -532,6 +579,60 @@ *c -= i; return ret; } + static WC_INLINE int wolfSSL_Atomic_Int_AddFetch(int *c, int i) { + return (*c += i); + } + static WC_INLINE int wolfSSL_Atomic_Int_SubFetch(int *c, int i) { + return (*c -= i); + } + static WC_INLINE int wolfSSL_Atomic_Int_CompareExchange( + int *c, int *expected_i, int new_i) + { + if (*c == *expected_i) { + *c = new_i; + return 1; + } + else { + *expected_i = *c; + return 0; + } + } + static WC_INLINE unsigned int wolfSSL_Atomic_Uint_FetchAdd( + unsigned int *c, unsigned int i) + { + unsigned int ret = *c; + *c += i; + return ret; + } + static WC_INLINE unsigned int wolfSSL_Atomic_Uint_FetchSub( + unsigned int *c, unsigned int i) + { + unsigned int ret = *c; + *c -= i; + return ret; + } + static WC_INLINE unsigned int wolfSSL_Atomic_Uint_AddFetch( + unsigned int *c, unsigned int i) + { + return (*c += i); + } + static WC_INLINE unsigned int wolfSSL_Atomic_Uint_SubFetch( + unsigned int *c, unsigned int i) + { + return (*c -= i); + } + static WC_INLINE int wolfSSL_Atomic_Uint_CompareExchange( + unsigned int *c, unsigned int *expected_i, unsigned int new_i) + { + if (*c == *expected_i) { + *c = new_i; + return 1; + } + else { + *expected_i = *c; + return 0; + } + } #endif /* Reference counting. */ From cefeb4cd7e75698c524f423d74b5a8e92d99bd8c Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Thu, 14 Aug 2025 09:33:14 -0500 Subject: [PATCH 2/2] atomics/cpuid_flags fixes from peer review: wolfcrypt/src/cpuid.c: cpuid_set_flag() and cpuid_clear_flag() thread safety; wolfcrypt/src/wc_port.c: comments re __ATOMIC_SEQ_CST and __ATOMIC_ACQUIRE; wolfssl/wolfcrypt/wc_port.h: single overrideable definitions for WOLFSSL_ATOMIC_COERCE_[U]INT(), and comment cleanup. also added WOLFSSL_USER_DEFINED_ATOMICS. --- .wolfssl_known_macro_extras | 1 + wolfcrypt/src/cpuid.c | 12 +++++++---- wolfcrypt/src/wc_port.c | 24 +++++++++++++++++++++ wolfssl/wolfcrypt/cpuid.h | 2 +- wolfssl/wolfcrypt/wc_port.h | 42 ++++++++++++++++++++++++------------- 5 files changed, 62 insertions(+), 19 deletions(-) diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 0b2d773a3..8eff27a88 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -877,6 +877,7 @@ WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY WOLFSSL_TRACK_MEMORY_FULL WOLFSSL_TRAP_MALLOC_SZ WOLFSSL_UNALIGNED_64BIT_ACCESS +WOLFSSL_USER_DEFINED_ATOMICS WOLFSSL_USER_FILESYSTEM WOLFSSL_USER_LOG WOLFSSL_USER_MUTEX diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 4b1882dc3..0250911db 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -355,14 +355,18 @@ void cpuid_set_flag(word32 flag) { - WOLFSSL_ATOMIC_STORE - (cpuid_flags, WOLFSSL_ATOMIC_LOAD(cpuid_flags) | flag); + word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); + while (! wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, ¤t_flags, current_flags | flag)) + WC_RELAX_LONG_LOOP(); } void cpuid_clear_flag(word32 flag) { - WOLFSSL_ATOMIC_STORE - (cpuid_flags, WOLFSSL_ATOMIC_LOAD(cpuid_flags) & ~flag); + word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); + while (! wolfSSL_Atomic_Uint_CompareExchange + (&cpuid_flags, ¤t_flags, current_flags & ~flag)) + WC_RELAX_LONG_LOOP(); } #endif /* HAVE_CPUID */ diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c index 76ea14651..fc42f6fdb 100644 --- a/wolfcrypt/src/wc_port.c +++ b/wolfcrypt/src/wc_port.c @@ -1310,6 +1310,12 @@ int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i) int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i, int new_i) { + /* For the success path, use full synchronization with barriers -- + * "Sequentially-consistent ordering" -- so that all threads see the same + * "single total modification order of all atomic operations" -- but on + * failure we just need to be sure we acquire the value that changed out + * from under us. + */ return __atomic_compare_exchange_n(c, expected_i, new_i, 0 /* weak */, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); } @@ -1341,6 +1347,12 @@ unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c, int wolfSSL_Atomic_Uint_CompareExchange( wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i) { + /* For the success path, use full synchronization with barriers -- + * "Sequentially-consistent ordering" -- so that all threads see the same + * "single total modification order of all atomic operations" -- but on + * failure we just need to be sure we acquire the value that changed out + * from under us. + */ return __atomic_compare_exchange_n( c, expected_i, new_i, 0 /* weak */, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE); } @@ -1383,6 +1395,12 @@ int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i) int wolfSSL_Atomic_Int_CompareExchange( wolfSSL_Atomic_Int* c, int *expected_i, int new_i) { + /* For the success path, use full synchronization with barriers -- + * "Sequentially-consistent ordering" -- so that all threads see the same + * "single total modification order of all atomic operations" -- but on + * failure we just need to be sure we acquire the value that changed out + * from under us. + */ return atomic_compare_exchange_strong_explicit( c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire); } @@ -1416,6 +1434,12 @@ unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c, int wolfSSL_Atomic_Uint_CompareExchange( wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i) { + /* For the success path, use full synchronization with barriers -- + * "Sequentially-consistent ordering" -- so that all threads see the same + * "single total modification order of all atomic operations" -- but on + * failure we just need to be sure we acquire the value that changed out + * from under us. + */ return atomic_compare_exchange_strong_explicit( c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire); } diff --git a/wolfssl/wolfcrypt/cpuid.h b/wolfssl/wolfcrypt/cpuid.h index 56bc61401..3ba3405b1 100644 --- a/wolfssl/wolfcrypt/cpuid.h +++ b/wolfssl/wolfcrypt/cpuid.h @@ -115,7 +115,7 @@ return 0; } - /* Public APIs to modify flags -- note that these are not threadsafe. */ + /* Public APIs to modify flags. */ WOLFSSL_API void cpuid_select_flags(word32 flags); WOLFSSL_API void cpuid_set_flag(word32 flag); WOLFSSL_API void cpuid_clear_flag(word32 flag); diff --git a/wolfssl/wolfcrypt/wc_port.h b/wolfssl/wolfcrypt/wc_port.h index 634f2e17a..b30a499f5 100644 --- a/wolfssl/wolfcrypt/wc_port.h +++ b/wolfssl/wolfcrypt/wc_port.h @@ -466,13 +466,20 @@ #endif #ifndef WOLFSSL_NO_ATOMICS - #ifdef SINGLE_THREADED + #if defined(WOLFSSL_USER_DEFINED_ATOMICS) + /* user-supplied bindings for wolfSSL_Atomic_Int etc. */ + #if !defined(WOLFSSL_ATOMIC_INITIALIZER) || \ + !defined(WOLFSSL_ATOMIC_LOAD) || \ + !defined(WOLFSSL_ATOMIC_STORE) + #error WOLFSSL_USER_DEFINED_ATOMICS is set but macro(s) are missing. + #else + #define WOLFSSL_ATOMIC_OPS + #endif + #elif defined(SINGLE_THREADED) typedef int wolfSSL_Atomic_Int; typedef unsigned int wolfSSL_Atomic_Uint; #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) (x) - #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) - #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) #define WOLFSSL_ATOMIC_OPS #elif defined(HAVE_C___ATOMIC) @@ -484,8 +491,6 @@ #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) __atomic_load_n(&(x), \ __ATOMIC_CONSUME) - #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) - #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) __atomic_store_n(&(x), \ val, __ATOMIC_RELEASE) #define WOLFSSL_ATOMIC_OPS @@ -498,8 +503,6 @@ typedef atomic_uint wolfSSL_Atomic_Uint; #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) atomic_load(&(x)) - #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) - #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) atomic_store(&(x), val) #define WOLFSSL_ATOMIC_OPS #endif /* WOLFSSL_HAVE_ATOMIC_H */ @@ -515,8 +518,6 @@ typedef volatile unsigned long wolfSSL_Atomic_Uint; #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) (x) - #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) - #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) #define WOLFSSL_ATOMIC_OPS #endif @@ -530,18 +531,31 @@ #ifdef WOLFSSL_NO_ATOMICS #define WOLFSSL_ATOMIC_INITIALIZER(x) (x) #define WOLFSSL_ATOMIC_LOAD(x) (x) - #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) - #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) #define WOLFSSL_ATOMIC_STORE(x, val) (x) = (val) #endif /* WOLFSSL_NO_ATOMICS */ -#if defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED) +/* WOLFSSL_ATOMIC_COERCE_INT() needs to accept either a regular int or an + * wolfSSL_Atomic_Int as its argument, and evaluate to a regular int. + * Allows a user-supplied override definition with type introspection. + */ +#ifndef WOLFSSL_ATOMIC_COERCE_INT + #define WOLFSSL_ATOMIC_COERCE_INT(x) ((int)(x)) +#endif +#ifndef WOLFSSL_ATOMIC_COERCE_UINT + #define WOLFSSL_ATOMIC_COERCE_UINT(x) ((unsigned int)(x)) +#endif + +#ifdef WOLFSSL_USER_DEFINED_ATOMICS + /* user-supplied bindings for wolfSSL_Atomic_Int_Init(), + * wolfSSL_Atomic_Int_FetchAdd(), etc. + */ +#elif defined(WOLFSSL_ATOMIC_OPS) && !defined(SINGLE_THREADED) WOLFSSL_API void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i); WOLFSSL_API void wolfSSL_Atomic_Uint_Init( wolfSSL_Atomic_Uint* c, unsigned int i); - /* Fetch* functions return the value of the counter immediately preceding + /* FetchOp functions return the value of the counter immediately preceding * the effects of the operation. - * *Fetch functions return the value of the counter immediately after + * OpFetch functions return the value of the counter immediately after * the effects of the operation. */ WOLFSSL_API int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i);