From 39c6c5af6f320d0a2c78690516e238a0e5d8a424 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Sat, 16 Aug 2025 13:04:28 -0500 Subject: [PATCH] wolfcrypt/src/cpuid.c, wolfssl/wolfcrypt/cpuid.h: change cpuid_flags_t to a regular word32, and use non-atomics for general flag checking, with a new implementation of cpuid_get_flags_ex() that is threadsafe by idempotency; rename strictly-threadsafe cpuid_get_flags_ex() as cpuid_get_flags_atomic() (strictly accurate return value), and add cpuid_flags_atomic_t and WC_CPUID_ATOMIC_INITIALIZER, used only for internal manipulation of flags in cpuid.c where atomicity matters. --- wolfcrypt/src/cpuid.c | 56 ++++++++++++++++++++++----------------- wolfssl/wolfcrypt/cpuid.h | 41 +++++++++++++++++++--------- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 0250911db..a0ac7c66a 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -25,7 +25,7 @@ #if defined(HAVE_CPUID) || defined(HAVE_CPUID_INTEL) || \ defined(HAVE_CPUID_AARCH64) - static cpuid_flags_t cpuid_flags = WC_CPUID_INITIALIZER; + static cpuid_flags_atomic_t cpuid_flags = WC_CPUID_ATOMIC_INITIALIZER; #endif #ifdef HAVE_CPUID_INTEL @@ -49,7 +49,7 @@ #define ECX 2 #define EDX 3 - static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) + static cpuid_flags_t cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { int got_intel_cpu = 0; int got_amd_cpu = 0; @@ -82,8 +82,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; if (cpuid_flag(1, 0, ECX, 28)) { new_cpuid_flags |= CPUID_AVX1 ; } if (cpuid_flag(7, 0, EBX, 5)) { new_cpuid_flags |= CPUID_AVX2 ; } if (cpuid_flag(7, 0, EBX, 8)) { new_cpuid_flags |= CPUID_BMI2 ; } @@ -115,8 +116,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; word64 features; __asm__ __volatile ( @@ -158,8 +160,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; word64 hwcaps = getauxval(AT_HWCAP); #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO @@ -204,8 +207,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; word64 features = android_getCpuFeatures(); if (features & ANDROID_CPU_ARM_FEATURE_AES) @@ -237,8 +241,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0) new_cpuid_flags |= CPUID_AES; if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0) @@ -269,8 +274,9 @@ static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; word64 features = 0; elf_aux_info(AT_HWCAP, &features, sizeof(features)); @@ -301,8 +307,9 @@ #else static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO new_cpuid_flags |= CPUID_AES; new_cpuid_flags |= CPUID_PMULL; @@ -332,8 +339,9 @@ #elif defined(HAVE_CPUID) static WC_INLINE void cpuid_set_flags(void) { - if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_UNINITED_VAL) { - word32 new_cpuid_flags = 0, old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t new_cpuid_flags = 0, + old_cpuid_flags = WC_CPUID_INITIALIZER; (void)wolfSSL_Atomic_Uint_CompareExchange (&cpuid_flags, &old_cpuid_flags, new_cpuid_flags); } @@ -342,28 +350,28 @@ #ifdef HAVE_CPUID - word32 cpuid_get_flags(void) + cpuid_flags_t cpuid_get_flags(void) { cpuid_set_flags(); return WOLFSSL_ATOMIC_LOAD(cpuid_flags); } - void cpuid_select_flags(word32 flags) + void cpuid_select_flags(cpuid_flags_t flags) { WOLFSSL_ATOMIC_STORE(cpuid_flags, flags); } - void cpuid_set_flag(word32 flag) + void cpuid_set_flag(cpuid_flags_t flag) { - word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); + cpuid_flags_t current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); while (! wolfSSL_Atomic_Uint_CompareExchange (&cpuid_flags, ¤t_flags, current_flags | flag)) WC_RELAX_LONG_LOOP(); } - void cpuid_clear_flag(word32 flag) + void cpuid_clear_flag(cpuid_flags_t flag) { - word32 current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); + cpuid_flags_t current_flags = WOLFSSL_ATOMIC_LOAD(cpuid_flags); while (! wolfSSL_Atomic_Uint_CompareExchange (&cpuid_flags, ¤t_flags, current_flags & ~flag)) WC_RELAX_LONG_LOOP(); diff --git a/wolfssl/wolfcrypt/cpuid.h b/wolfssl/wolfcrypt/cpuid.h index 3ba3405b1..5b461ffb4 100644 --- a/wolfssl/wolfcrypt/cpuid.h +++ b/wolfssl/wolfcrypt/cpuid.h @@ -44,14 +44,15 @@ #define HAVE_CPUID_AARCH64 #endif -#define WC_CPUID_UNINITED_VAL 0xffffffffU +#define WC_CPUID_INITIALIZER 0xffffffffU +typedef word32 cpuid_flags_t; #if !defined(WOLFSSL_NO_ATOMICS) && !defined(SINGLE_THREADED) - typedef wolfSSL_Atomic_Uint cpuid_flags_t; - #define WC_CPUID_INITIALIZER \ - WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_UNINITED_VAL) + typedef wolfSSL_Atomic_Uint cpuid_flags_atomic_t; + #define WC_CPUID_ATOMIC_INITIALIZER \ + WOLFSSL_ATOMIC_INITIALIZER(WC_CPUID_INITIALIZER) #else - typedef word32 cpuid_flags_t; - #define WC_CPUID_INITIALIZER WC_CPUID_UNINITED_VAL + typedef word32 cpuid_flags_atomic_t; + #define WC_CPUID_ATOMIC_INITIALIZER WC_CPUID_INITIALIZER #endif #ifdef HAVE_CPUID_INTEL @@ -103,11 +104,26 @@ #endif #ifdef HAVE_CPUID - word32 cpuid_get_flags(void); + cpuid_flags_t cpuid_get_flags(void); + /* Idempotent flag getter -- fast, but return value (whether updated) is not + * strictly reliable. + */ static WC_INLINE int cpuid_get_flags_ex(cpuid_flags_t *flags) { - if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_UNINITED_VAL) { - word32 old_cpuid_flags = WC_CPUID_UNINITED_VAL; + if (*flags == WC_CPUID_INITIALIZER) { + *flags = cpuid_get_flags(); + return 1; + } + else + return 0; + } + + /* Strictly race-free flag getter -- slow, but the return value is strictly + * accurate. + */ + static WC_INLINE int cpuid_get_flags_atomic(cpuid_flags_atomic_t *flags) { + if (WOLFSSL_ATOMIC_LOAD(*flags) == WC_CPUID_INITIALIZER) { + cpuid_flags_t old_cpuid_flags = WC_CPUID_INITIALIZER; return wolfSSL_Atomic_Uint_CompareExchange (flags, &old_cpuid_flags, cpuid_get_flags()); } @@ -115,10 +131,11 @@ return 0; } + /* Public APIs to modify flags. */ - WOLFSSL_API void cpuid_select_flags(word32 flags); - WOLFSSL_API void cpuid_set_flag(word32 flag); - WOLFSSL_API void cpuid_clear_flag(word32 flag); + WOLFSSL_API void cpuid_select_flags(cpuid_flags_t flags); + WOLFSSL_API void cpuid_set_flag(cpuid_flags_t flag); + WOLFSSL_API void cpuid_clear_flag(cpuid_flags_t flag); #endif