Merge pull request #9107 from douzzer/20250816-cpuid_get_flags_ex-optimize

20250816-cpuid_get_flags_ex-optimize
This commit is contained in:
Sean Parkinson
2025-08-18 22:13:44 +10:00
committed by GitHub
2 changed files with 61 additions and 36 deletions

View File

@@ -25,7 +25,7 @@
#if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \
defined(HAVE_CPUID_AARCH64)
static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER;
static cpuid_flags_atomic_t cpuid_flags = WC_CPUID_ATOMIC_INITIALIZER;
#endif
#ifdef HAVE_CPUID_INTEL
@@ -49,7 +49,7 @@
#define ECX 2
#define EDX 3
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit)
static cpuid_flags_t cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit)
{
int got_intel_cpu = 0;
int got_amd_cpu = 0;
@@ -82,8 +82,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
if (cpuid_flag(1, 0, ECX, 28)) { new_cpuid_flags |= CPUID_AVX1 ; }
if (cpuid_flag(7, 0, EBX, 5)) { new_cpuid_flags |= CPUID_AVX2 ; }
if (cpuid_flag(7, 0, EBX, 8)) { new_cpuid_flags |= CPUID_BMI2 ; }
@@ -115,8 +116,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
word64 features;
__asm__ __volatile (
@@ -158,8 +160,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
word64 hwcaps = getauxval(AT_HWCAP);
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
@@ -204,8 +207,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
word64 features = android_getCpuFeatures();
if (features & ANDROID_CPU_ARM_FEATURE_AES)
@@ -237,8 +241,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0)
new_cpuid_flags |= CPUID_AES;
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0)
@@ -269,8 +274,9 @@
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
word64 features = 0;
elf_aux_info(AT_HWCAP, &features, sizeof(features));
@@ -301,8 +307,9 @@
#else
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
new_cpuid_flags |= CPUID_AES;
new_cpuid_flags |= CPUID_PMULL;
@@ -332,8 +339,9 @@
#elif defined(HAVE_CPUID)
static WC_INLINE void cpuid_set_flags(void)
{
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) {
word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t new_cpuid_flags = 0,
old_cpuid_flags = WC_CPUID_INITIALIZER;
(void)wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
}
@@ -342,28 +350,28 @@
#ifdef HAVE_CPUID
word32 cpuid_get_flags(void)
cpuid_flags_t cpuid_get_flags(void)
{
cpuid_set_flags();
return WOLFSSL_ATOMIC_LOAD(cpuid_flags);
}
void cpuid_select_flags(word32 flags)
void cpuid_select_flags(cpuid_flags_t flags)
{
WOLFSSL_ATOMIC_STORE(cpuid_flags, flags);
}
void cpuid_set_flag(word32 flag)
void cpuid_set_flag(cpuid_flags_t flag)
{
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
cpuid_flags_t current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags | flag))
WC_RELAX_LONG_LOOP();
}
void cpuid_clear_flag(word32 flag)
void cpuid_clear_flag(cpuid_flags_t flag)
{
word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
cpuid_flags_t current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags);
while (! wolfSSL_Atomic_Uint_CompareExchange
(&cpuid_flags, &current_flags, current_flags & ~flag))
WC_RELAX_LONG_LOOP();

View File

@@ -44,14 +44,15 @@
#define HAVE_CPUID_AARCH64
#endif
#define WC_CPUID_UNINITED_VAL 0xffffffffU
#define WC_CPUID_INITIALIZER 0xffffffffU
typedef word32 cpuid_flags_t;
#if !defined(WOLFSSL_NO_ATOMICS) && !defined(SINGLE_THREADED)
typedef wolfSSL_Atomic_Uint cpuid_flags_t;
#define WC_CPUID_INITIALIZER \
WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_UNINITED_VAL)
typedef wolfSSL_Atomic_Uint cpuid_flags_atomic_t;
#define WC_CPUID_ATOMIC_INITIALIZER \
WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_INITIALIZER)
#else
typedef word32 cpuid_flags_t;
#define WC_CPUID_INITIALIZER WC_CPUID_UNINITED_VAL
typedef word32 cpuid_flags_atomic_t;
#define WC_CPUID_ATOMIC_INITIALIZER WC_CPUID_INITIALIZER
#endif
#ifdef HAVE_CPUID_INTEL
@@ -103,11 +104,26 @@
#endif
#ifdef HAVE_CPUID
word32 cpuid_get_flags(void);
cpuid_flags_t cpuid_get_flags(void);
/* Idempotent flag getter -- fast, but return value (whether updated) is not
* strictly reliable.
*/
static WC_INLINE int cpuid_get_flags_ex(cpuid_flags_t *flags) {
if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_UNINITED_VAL) {
word32 old_cpuid_flags = WC_CPUID_UNINITED_VAL;
if (*flags == WC_CPUID_INITIALIZER) {
*flags = cpuid_get_flags();
return 1;
}
else
return 0;
}
/* Strictly race-free flag getter -- slow, but the return value is strictly
* accurate.
*/
static WC_INLINE int cpuid_get_flags_atomic(cpuid_flags_atomic_t *flags) {
if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_INITIALIZER) {
cpuid_flags_t old_cpuid_flags = WC_CPUID_INITIALIZER;
return wolfSSL_Atomic_Uint_CompareExchange
(flags, &old_cpuid_flags, cpuid_get_flags());
}
@@ -115,10 +131,11 @@
return 0;
}
/* Public APIs to modify flags. */
WOLFSSL_API void cpuid_select_flags(word32 flags);
WOLFSSL_API void cpuid_set_flag(word32 flag);
WOLFSSL_API void cpuid_clear_flag(word32 flag);
WOLFSSL_API void cpuid_select_flags(cpuid_flags_t flags);
WOLFSSL_API void cpuid_set_flag(cpuid_flags_t flag);
WOLFSSL_API void cpuid_clear_flag(cpuid_flags_t flag);
#endif