Merge pull request #9097 from douzzer/20250812-atomic-cmpxchg

20250812-atomic-cmpxchg
This commit is contained in:
Juliusz Sosinowicz
2025-08-15 01:14:45 +02:00
committed by GitHub
19 changed files with 656 additions and 231 deletions

View File

@@ -624,11 +624,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
*/
static int checkedAESNI = 0;
static int haveAESNI = 0;
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
static WARN_UNUSED_RESULT int Check_CPU_support_AES(void)
{
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
return IS_INTEL_AESNI(intel_flags) != 0;
}
@@ -786,15 +786,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#define NEED_AES_TABLES
static int checkedCpuIdFlags = 0;
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
static void Check_CPU_support_HwCrypto(Aes* aes)
{
if (checkedCpuIdFlags == 0) {
cpuid_flags = cpuid_get_flags();
checkedCpuIdFlags = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
aes->use_aes_hw_crypto = IS_AARCH64_AES(cpuid_flags);
#ifdef HAVE_AESGCM
aes->use_pmull_hw_crypto = IS_AARCH64_PMULL(cpuid_flags);

View File

@@ -109,8 +109,7 @@ Public domain.
#define HAVE_INTEL_AVX2
#endif
static int cpuidFlagsSet = 0;
static word32 cpuidFlags = 0;
static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER;
#endif
/**
@@ -332,10 +331,7 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
return 0;
}
if (!cpuidFlagsSet) {
cpuidFlags = cpuid_get_flags();
cpuidFlagsSet = 1;
}
cpuid_get_flags_ex(&cpuidFlags);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(cpuidFlags)) {

View File

@@ -25,8 +25,7 @@
#if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \
defined(HAVE_CPUID_AARCH64)
static word32 cpuid_check = 0;
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef HAVE_CPUID_INTEL
@@ -81,21 +80,22 @@
}
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; }
if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; }
if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; }
if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; }
if (cpuid_flag(1, 0, ECX, 22)) { cpuid_flags |= CPUID_MOVBE ; }
if (cpuid_flag(7, 0, EBX, 3)) { cpuid_flags |= CPUID_BMI1 ; }
if (cpuid_flag(7, 0, EBX, 29)) { cpuid_flags |= CPUID_SHA ; }
cpuid_check = 1;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (cpuid_flag(1, 0, ECX, 28)) { new_cpuid_flags |= CPUID_AVX1 ; }
if (cpuid_flag(7, 0, EBX, 5)) { new_cpuid_flags |= CPUID_AVX2 ; }
if (cpuid_flag(7, 0, EBX, 8)) { new_cpuid_flags |= CPUID_BMI2 ; }
if (cpuid_flag(1, 0, ECX, 30)) { new_cpuid_flags |= CPUID_RDRAND; }
if (cpuid_flag(7, 0, EBX, 18)) { new_cpuid_flags |= CPUID_RDSEED; }
if (cpuid_flag(1, 0, ECX, 25)) { new_cpuid_flags |= CPUID_AESNI ; }
if (cpuid_flag(7, 0, EBX, 19)) { new_cpuid_flags |= CPUID_ADX ; }
if (cpuid_flag(1, 0, ECX, 22)) { new_cpuid_flags |= CPUID_MOVBE ; }
if (cpuid_flag(7, 0, EBX, 3)) { new_cpuid_flags |= CPUID_BMI1 ; }
if (cpuid_flag(7, 0, EBX, 29)) { new_cpuid_flags |= CPUID_SHA ; }
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(HAVE_CPUID_AARCH64)
@@ -113,9 +113,10 @@
/* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers
* /ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0 */
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features;
__asm__ __volatile (
@@ -126,25 +127,26 @@
);
if (features & CPUID_AARCH64_FEAT_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
}
if (features & CPUID_AARCH64_FEAT_SHA256)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (features & CPUID_AARCH64_FEAT_SHA256_512)
cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
if (features & CPUID_AARCH64_FEAT_RDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (features & CPUID_AARCH64_FEAT_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
if (features & CPUID_AARCH64_FEAT_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
if (features & CPUID_AARCH64_FEAT_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__linux__)
@@ -154,42 +156,44 @@
#include <sys/auxv.h>
#include <asm/hwcap.h>
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 hwcaps = getauxval(AT_HWCAP);
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
if (hwcaps & HWCAP_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (hwcaps & HWCAP_PMULL)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (hwcaps & HWCAP_SHA2)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
if (hwcaps & HWCAP_SHA512)
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
#endif
#if defined(HWCAP_ASIMDRDM) && !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
if (hwcaps & HWCAP_ASIMDRDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
if (hwcaps & HWCAP_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
if (hwcaps & HWCAP_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
if (hwcaps & HWCAP_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
(void)hwcaps;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__ANDROID__) || defined(ANDROID)
@@ -198,19 +202,21 @@
#include "cpu-features.h"
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features = android_getCpuFeatures();
if (features & ANDROID_CPU_ARM_FEATURE_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & ANDROID_CPU_ARM_FEATURE_PMULL)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (features & ANDROID_CPU_ARM_FEATURE_SHA2)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__APPLE__)
@@ -229,29 +235,31 @@
return ret;
}
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0)
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_PMULL;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0)
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#elif defined(__FreeBSD__) || defined(__OpenBSD__)
@@ -259,70 +267,75 @@
#include <sys/auxv.h>
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
word64 features = 0;
elf_aux_info(AT_HWCAP, &features, sizeof(features));
if (features & CPUID_AARCH64_FEAT_AES)
cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_AES;
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
}
if (features & CPUID_AARCH64_FEAT_SHA256)
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_SHA256;
if (features & CPUID_AARCH64_FEAT_SHA256_512)
cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
if (features & CPUID_AARCH64_FEAT_RDM)
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
if (features & CPUID_AARCH64_FEAT_SHA3)
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
if (features & CPUID_AARCH64_FEAT_SM3)
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
if (features & CPUID_AARCH64_FEAT_SM4)
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#else
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
cpuid_flags |= CPUID_AES;
cpuid_flags |= CPUID_PMULL;
cpuid_flags |= CPUID_SHA256;
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
new_cpuid_flags |= CPUID_SHA256;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
cpuid_flags |= CPUID_SHA512;
new_cpuid_flags |= CPUID_SHA512;
#endif
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
cpuid_flags |= CPUID_RDM;
new_cpuid_flags |= CPUID_RDM;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
cpuid_flags |= CPUID_SHA3;
new_cpuid_flags |= CPUID_SHA3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
cpuid_flags |= CPUID_SM3;
new_cpuid_flags |= CPUID_SM3;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
cpuid_flags |= CPUID_SM4;
new_cpuid_flags |= CPUID_SM4;
#endif
cpuid_check = 1;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#endif
#elif defined(HAVE_CPUID)
void cpuid_set_flags(void)
static WC_INLINE void cpuid_set_flags(void)
{
if (!cpuid_check) {
cpuid_flags = 0;
cpuid_check = 1;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
}
#endif
@@ -331,24 +344,29 @@
word32 cpuid_get_flags(void)
{
if (!cpuid_check)
cpuid_set_flags();
return cpuid_flags;
cpuid_set_flags();
return WOLFSSL_ATOMIC_LOAD(cpuid_flags);
}
void cpuid_select_flags(word32 flags)
{
cpuid_flags = flags;
WOLFSSL_ATOMIC_STORE(cpuid_flags, flags);
}
void cpuid_set_flag(word32 flag)
{
cpuid_flags |= flag;
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags | flag))
WC_RELAX_LONG_LOOP();
}
void cpuid_clear_flag(word32 flag)
{
cpuid_flags &= ~flag;
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags & ~flag))
WC_RELAX_LONG_LOOP();
}
#endif /* HAVE_CPUID */

View File

@@ -169,7 +169,7 @@
#ifdef WOLFSSL_WC_DILITHIUM
#if defined(USE_INTEL_SPEEDUP)
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef DEBUG_DILITHIUM
@@ -10623,7 +10623,7 @@ int wc_dilithium_init_ex(dilithium_key* key, void* heap, int devId)
}
#if defined(WOLFSSL_WC_DILITHIUM) && defined(USE_INTEL_SPEEDUP)
cpuid_flags = cpuid_get_flags();
cpuid_get_flags_ex(&cpuid_flags);
#endif
return ret;

View File

@@ -83,8 +83,7 @@ and Daniel J. Bernstein
#endif
#ifdef USE_INTEL_POLY1305_SPEEDUP
static word32 intel_flags = 0;
static word32 cpu_flags_set = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#endif
#if defined(USE_INTEL_POLY1305_SPEEDUP) || defined(POLY130564)
@@ -513,10 +512,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
return BAD_FUNC_ARG;
#ifdef USE_INTEL_POLY1305_SPEEDUP
if (!cpu_flags_set) {
intel_flags = cpuid_get_flags();
cpu_flags_set = 1;
}
cpuid_get_flags_ex(&intel_flags);
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags))

View File

@@ -274,8 +274,7 @@ static void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
#endif
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
static word32 cpuid_flags = 0;
static int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
static int InitSha256(wc_Sha256* sha256)
@@ -1763,10 +1762,7 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
#endif
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;
@@ -2048,10 +2044,7 @@ int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data,
sha224->heap = heap;
#if defined(__aarch64__) && !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;

View File

@@ -57,8 +57,7 @@
#endif
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
static word32 cpuid_flags = 0;
static int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
#ifdef WOLFSSL_SHA512
@@ -198,10 +197,7 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId,
return ret;
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;
@@ -884,10 +880,7 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
#endif
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM_CRYPTO_SHA512)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
}
cpuid_get_flags_ex(&cpuid_flags);
#endif
(void)devId;

View File

@@ -184,10 +184,10 @@ This library contains implementation for the random number generator.
#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \
defined(HAVE_AMD_RDSEED)
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
static void wc_InitRng_IntelRD(void)
{
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
}
#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED)
static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz);

View File

@@ -388,7 +388,7 @@ static int InitSha256(wc_Sha256* sha256)
} /* extern "C" */
#endif
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS)
#define WC_NO_INTERNAL_FUNCTION_POINTERS
@@ -425,8 +425,7 @@ static int InitSha256(wc_Sha256* sha256)
}
#endif
if (intel_flags == 0)
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1
@@ -601,7 +600,7 @@ static int InitSha256(wc_Sha256* sha256)
if (transform_check)
return;
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1

View File

@@ -67,8 +67,7 @@
defined(WOLFSSL_ARMASM))
#include <wolfssl/wolfcrypt/cpuid.h>
word32 cpuid_flags;
int cpuid_flags_set = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#ifdef WC_C_DYNAMIC_FALLBACK
#define SHA3_BLOCK (sha3->sha3_block)
#define SHA3_BLOCK_N (sha3->sha3_block_n)
@@ -612,17 +611,19 @@ static int InitSha3(wc_Sha3* sha3)
#endif
#ifdef USE_INTEL_SPEEDUP
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
#ifdef WC_C_DYNAMIC_FALLBACK
}
{
int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags);
#ifdef WC_C_DYNAMIC_FALLBACK
(void)cpuid_flags_were_updated;
if (! CAN_SAVE_VECTOR_REGISTERS()) {
SHA3_BLOCK = BlockSha3;
SHA3_BLOCK_N = NULL;
}
else
#else
if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) {
}
else
#endif
if (IS_INTEL_AVX2(cpuid_flags)) {
SHA3_BLOCK = sha3_block_avx2;
@@ -638,11 +639,13 @@ static int InitSha3(wc_Sha3* sha3)
}
}
#define SHA3_FUNC_PTR
#endif
#endif /* USE_INTEL_SPEEDUP */
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM)
if (!cpuid_flags_set) {
cpuid_flags = cpuid_get_flags();
cpuid_flags_set = 1;
{
int cpuid_flags_were_updated = cpuid_get_flags_ex(&cpuid_flags);
if ((! cpuid_flags_were_updated) && (SHA3_BLOCK != NULL)) {
}
else
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
if (IS_AARCH64_SHA3(cpuid_flags)) {
SHA3_BLOCK = BlockSha3_crypto;

View File

@@ -544,7 +544,7 @@ static int InitSha512_256(wc_Sha512* sha512)
} /* extern "C" */
#endif
static word32 intel_flags = 0;
static cpuid_flags_t intel_flags = WC_CPUID_INITIALIZER;
#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS)
#define WC_NO_INTERNAL_FUNCTION_POINTERS
@@ -582,8 +582,7 @@ static int InitSha512_256(wc_Sha512* sha512)
}
#endif
if (intel_flags == 0)
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
#if defined(HAVE_INTEL_AVX2)
if (IS_INTEL_AVX2(intel_flags)) {
@@ -724,7 +723,7 @@ static int InitSha512_256(wc_Sha512* sha512)
if (transform_check)
return;
intel_flags = cpuid_get_flags();
cpuid_get_flags_ex(&intel_flags);
#if defined(HAVE_INTEL_AVX2)
if (IS_INTEL_AVX2(intel_flags)) {

View File

@@ -103,7 +103,7 @@ extern volatile sword16 mlkem_opt_blocker;
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
static word32 cpuid_flags = 0;
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
#endif
/* Half of Q plus one. Converted message bit value of 1. */
@@ -1243,7 +1243,7 @@ void mlkem_init(void)
{
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
cpuid_flags = cpuid_get_flags();
cpuid_get_flags_ex(&cpuid_flags);
#endif
}

View File

@@ -1282,6 +1282,11 @@ void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
*c = i;
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
*c = i;
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_fetch_add(c, i, __ATOMIC_RELAXED);
@@ -1291,13 +1296,80 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_add_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i,
int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return __atomic_compare_exchange_n(c, expected_i, new_i, 0 /* weak */,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_fetch_add(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_fetch_sub(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_add_fetch(c, i, __ATOMIC_RELAXED);
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return __atomic_sub_fetch(c, i, __ATOMIC_RELAXED);
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return __atomic_compare_exchange_n(
c, expected_i, new_i, 0 /* weak */, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
}
#else
/* Default C Implementation */
void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
{
atomic_init(c, i);
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
atomic_init(c, i);
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return atomic_fetch_add_explicit(c, i, memory_order_relaxed);
@@ -1307,16 +1379,85 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
{
return atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed);
return ret + i;
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
return ret - i;
}
int wolfSSL_Atomic_Int_CompareExchange(
wolfSSL_Atomic_Int* c, int *expected_i, int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return atomic_compare_exchange_strong_explicit(
c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire);
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return atomic_fetch_add_explicit(c, i, memory_order_relaxed);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = atomic_fetch_add_explicit(c, i, memory_order_relaxed);
return ret + i;
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = atomic_fetch_sub_explicit(c, i, memory_order_relaxed);
return ret - i;
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
/* For the success path, use full synchronization with barriers --
* "Sequentially-consistent ordering" -- so that all threads see the same
* "single total modification order of all atomic operations" -- but on
* failure we just need to be sure we acquire the value that changed out
* from under us.
*/
return atomic_compare_exchange_strong_explicit(
c, expected_i, new_i, memory_order_seq_cst, memory_order_acquire);
}
#endif /* __cplusplus */
#elif defined(_MSC_VER)
/* Default C Implementation */
void wolfSSL_Atomic_Int_Init(wolfSSL_Atomic_Int* c, int i)
{
*c = i;
}
void wolfSSL_Atomic_Uint_Init(wolfSSL_Atomic_Uint* c, unsigned int i)
{
*c = i;
}
int wolfSSL_Atomic_Int_FetchAdd(wolfSSL_Atomic_Int* c, int i)
{
return (int)_InterlockedExchangeAdd(c, (long)i);
@@ -1327,6 +1468,76 @@ int wolfSSL_Atomic_Int_FetchSub(wolfSSL_Atomic_Int* c, int i)
return (int)_InterlockedExchangeAdd(c, (long)-i);
}
int wolfSSL_Atomic_Int_AddFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = (int)_InterlockedExchangeAdd(c, (long)i);
return ret + i;
}
int wolfSSL_Atomic_Int_SubFetch(wolfSSL_Atomic_Int* c, int i)
{
int ret = (int)_InterlockedExchangeAdd(c, (long)-i);
return ret - i;
}
int wolfSSL_Atomic_Int_CompareExchange(wolfSSL_Atomic_Int* c, int *expected_i,
int new_i)
{
long actual_i = InterlockedCompareExchange(c, (long)new_i,
(long)*expected_i);
if (actual_i == (long)*expected_i) {
return 1;
}
else {
*expected_i = (int)actual_i;
return 0;
}
}
unsigned int wolfSSL_Atomic_Uint_FetchAdd(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c,
(long)i);
}
unsigned int wolfSSL_Atomic_Uint_FetchSub(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
return (unsigned int)_InterlockedExchangeAdd((wolfSSL_Atomic_Int *)c,
-(long)i);
}
unsigned int wolfSSL_Atomic_Uint_AddFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = (unsigned int)_InterlockedExchangeAdd
((wolfSSL_Atomic_Int *)c, (long)i);
return ret + i;
}
unsigned int wolfSSL_Atomic_Uint_SubFetch(wolfSSL_Atomic_Uint* c,
unsigned int i)
{
unsigned int ret = (unsigned int)_InterlockedExchangeAdd
((wolfSSL_Atomic_Int *)c, -(long)i);
return ret - i;
}
int wolfSSL_Atomic_Uint_CompareExchange(
wolfSSL_Atomic_Uint* c, unsigned int *expected_i, unsigned int new_i)
{
long actual_i = InterlockedCompareExchange
((wolfSSL_Atomic_Int *)c, (long)new_i, (long)*expected_i);
if (actual_i == (long)*expected_i) {
return 1;
}
else {
*expected_i = (unsigned int)actual_i;
return 0;
}
}
#endif
#endif /* WOLFSSL_ATOMIC_OPS */
@@ -1395,7 +1606,8 @@ void wolfSSL_RefWithMutexDec(wolfSSL_RefWithMutex* ref, int* isZero, int* err)
#if WOLFSSL_CRYPT_HW_MUTEX
/* Mutex for protection of cryptography hardware */
static wolfSSL_Mutex wcCryptHwMutex WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex);
static wolfSSL_Mutex wcCryptHwMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwMutex);
#ifndef WOLFSSL_MUTEX_INITIALIZER
static int wcCryptHwMutexInit = 0;
#endif
@@ -1437,20 +1649,20 @@ int wolfSSL_CryptHwMutexUnLock(void)
#if WOLFSSL_CRYPT_HW_MUTEX && defined(WOLFSSL_ALGO_HW_MUTEX)
/* Mutex for protection of cryptography hardware */
#ifndef NO_RNG_MUTEX
static wolfSSL_Mutex wcCryptHwRngMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex);
static wolfSSL_Mutex wcCryptHwRngMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwRngMutex);
#endif /* NO_RNG_MUTEX */
#ifndef NO_AES_MUTEX
static wolfSSL_Mutex wcCryptHwAesMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex);
static wolfSSL_Mutex wcCryptHwAesMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwAesMutex);
#endif /* NO_AES_MUTEX */
#ifndef NO_HASH_MUTEX
static wolfSSL_Mutex wcCryptHwHashMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex);
static wolfSSL_Mutex wcCryptHwHashMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwHashMutex);
#endif /* NO_HASH_MUTEX */
#ifndef NO_PK_MUTEX
static wolfSSL_Mutex wcCryptHwPkMutex \
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex);
static wolfSSL_Mutex wcCryptHwPkMutex
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(wcCryptHwPkMutex);
#endif /* NO_PK_MUTEX */
#ifndef WOLFSSL_MUTEX_INITIALIZER