From 478bfafea38e56a157b2ccc8ffccb399f453038e Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Thu, 3 Jul 2025 17:32:35 -0500 Subject: [PATCH 1/3] linuxkm/lkcapi_sha_glue.c: * add wc_linuxkm_drbg_ctx.n_rngs, and in wc_linuxkm_drbg_init_tfm(), set it to max(4, nr_cpu_ids), to avoid stalling on unicore targets; * add explanatory comments re architecture to get_drbg() and get_drbg_n(); * add opportunistic cond_sched() to get_drbg_n(); * add runtime asserts in get_drbg(), wc_linuxkm_drbg_seed(), and get_default_drbg_ctx(), checking that we have the right tfm with an allocated DRBG array; * wc_linuxkm_drbg_startup(): return failure if registering the random_bytes handlers fails; linuxkm/patches/6.1.73/WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch: fix flub. --- linuxkm/lkcapi_sha_glue.c | 91 +++++++++++++++---- ...XKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch | 4 +- 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/linuxkm/lkcapi_sha_glue.c b/linuxkm/lkcapi_sha_glue.c index 9f38a8939..4d955df95 100644 --- a/linuxkm/lkcapi_sha_glue.c +++ b/linuxkm/lkcapi_sha_glue.c @@ -940,6 +940,7 @@ struct wc_swallow_the_semicolon #include struct wc_linuxkm_drbg_ctx { + size_t n_rngs; struct wc_rng_inst { wolfSSL_Atomic_Int lock; WC_RNG rng; @@ -951,7 +952,7 @@ static inline void wc_linuxkm_drbg_ctx_clear(struct wc_linuxkm_drbg_ctx * ctx) unsigned int i; if (ctx->rngs) { - for (i = 0; i < nr_cpu_ids; ++i) { + for (i = 0; i < ctx->n_rngs; ++i) { if (ctx->rngs[i].lock != 0) { /* better to leak than to crash. */ pr_err("BUG: wc_linuxkm_drbg_ctx_clear called with DRBG #%d still locked.", i); @@ -961,6 +962,7 @@ static inline void wc_linuxkm_drbg_ctx_clear(struct wc_linuxkm_drbg_ctx * ctx) } free(ctx->rngs); ctx->rngs = NULL; + ctx->n_rngs = 0; } return; @@ -976,12 +978,15 @@ static int wc_linuxkm_drbg_init_tfm(struct crypto_tfm *tfm) int need_reenable_vec = 0; int can_sleep = (preempt_count() == 0); - ctx->rngs = (struct wc_rng_inst *)malloc(sizeof(*ctx->rngs) * nr_cpu_ids); - if (! ctx->rngs) + ctx->n_rngs = max(4, nr_cpu_ids); + ctx->rngs = (struct wc_rng_inst *)malloc(sizeof(*ctx->rngs) * ctx->n_rngs); + if (! ctx->rngs) { + ctx->n_rngs = 0; return -ENOMEM; - XMEMSET(ctx->rngs, 0, sizeof(*ctx->rngs) * nr_cpu_ids); + } + XMEMSET(ctx->rngs, 0, sizeof(*ctx->rngs) * ctx->n_rngs); - for (i = 0; i < nr_cpu_ids; ++i) { + for (i = 0; i < ctx->n_rngs; ++i) { ctx->rngs[i].lock = 0; if (wc_linuxkm_drbg_init_tfm_disable_vector_registers) need_reenable_vec = (DISABLE_VECTOR_REGISTERS() == 0); @@ -1015,10 +1020,29 @@ static void wc_linuxkm_drbg_exit_tfm(struct crypto_tfm *tfm) static int wc_linuxkm_drbg_default_instance_registered = 0; +/* get_drbg() uses atomic operations to get exclusive ownership of a DRBG + * without delay. It expects to be called in uninterruptible context, though + * works fine in any context. It starts by trying the DRBG matching the current + * CPU ID, and if that doesn't immediately succeed, it iterates upward until one + * succeeds. The first attempt will always succeed, even under intense load, + * unless there is or has recently been a reseed or mix-in operation competing + * with generators. + * + * Note that wc_linuxkm_drbg_init_tfm() allocates at least 4 DRBGs, regardless + * of nominal core count, to avoid stalling generators on unicore targets. + */ + static inline struct wc_rng_inst *get_drbg(struct crypto_rng *tfm) { struct wc_linuxkm_drbg_ctx *ctx = (struct wc_linuxkm_drbg_ctx *)crypto_rng_ctx(tfm); int n, new_lock_value; + /* check for mismatched handler or missing instance array. */ + if ((tfm->base.__crt_alg->cra_init != wc_linuxkm_drbg_init_tfm) || + (ctx->rngs == NULL)) + { + return NULL; + } + #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) if (tfm == crypto_default_rng) { @@ -1041,7 +1065,7 @@ static inline struct wc_rng_inst *get_drbg(struct crypto_rng *tfm) { if (likely(__atomic_compare_exchange_n(&ctx->rngs[n].lock, &expected, new_lock_value, 0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))) return &ctx->rngs[n]; ++n; - if (n >= (int)nr_cpu_ids) + if (n >= (int)ctx->n_rngs) n = 0; cpu_relax(); } @@ -1049,12 +1073,20 @@ static inline struct wc_rng_inst *get_drbg(struct crypto_rng *tfm) { __builtin_unreachable(); } +/* get_drbg_n() is used by bulk seed, mix-in, and reseed operations. It expects + * the caller to be able to wait until the requested DRBG is available. + */ static inline struct wc_rng_inst *get_drbg_n(struct wc_linuxkm_drbg_ctx *ctx, int n) { + int can_sleep = (preempt_count() == 0); + for (;;) { int expected = 0; if (likely(__atomic_compare_exchange_n(&ctx->rngs[n].lock, &expected, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))) return &ctx->rngs[n]; - cpu_relax(); + if (can_sleep) + cond_resched(); + else + cpu_relax(); } __builtin_unreachable(); @@ -1078,17 +1110,18 @@ static int wc_linuxkm_drbg_generate(struct crypto_rng *tfm, u8 *dst, unsigned int dlen) { int ret, retried = 0; - /* Note, core is not necessarily locked on entry, so the actual core ID may - * change while executing, hence the lock. - * - * The lock is also needed to coordinate with wc_linuxkm_drbg_seed(), which - * seeds all instances. - */ + int need_fpu_restore; struct wc_rng_inst *drbg = get_drbg(tfm); + + if (! drbg) { + pr_err_once("BUG: get_drbg() failed."); + return -EFAULT; + } + /* for the default RNG, make sure we don't cache an underlying SHA256 * method that uses vector insns (forbidden from irq handlers). */ - int need_fpu_restore = (tfm == crypto_default_rng) ? (DISABLE_VECTOR_REGISTERS() == 0) : 0; + need_fpu_restore = (tfm == crypto_default_rng) ? (DISABLE_VECTOR_REGISTERS() == 0) : 0; retry: @@ -1138,6 +1171,13 @@ static int wc_linuxkm_drbg_seed(struct crypto_rng *tfm, int ret; int n; + if ((tfm->base.__crt_alg->cra_init != wc_linuxkm_drbg_init_tfm) || + (ctx->rngs == NULL)) + { + pr_err_once("BUG: mismatched tfm."); + return -EFAULT; + } + if (slen == 0) return 0; @@ -1146,7 +1186,10 @@ static int wc_linuxkm_drbg_seed(struct crypto_rng *tfm, return -ENOMEM; XMEMCPY(seed_copy + 2, seed, slen); - for (n = nr_cpu_ids - 1; n >= 0; --n) { + /* this iteration counts down, whereas the iteration in get_drbg() counts + * up, to assure they can't possibly phase-lock to each other. + */ + for (n = ctx->n_rngs - 1; n >= 0; --n) { struct wc_rng_inst *drbg = get_drbg_n(ctx, n); /* perturb the seed with the CPU ID, so that no DRBG has the exact same @@ -1249,7 +1292,13 @@ static inline struct crypto_rng *get_crypto_default_rng(void) { static inline struct wc_linuxkm_drbg_ctx *get_default_drbg_ctx(void) { struct crypto_rng *current_crypto_default_rng = get_crypto_default_rng(); - return current_crypto_default_rng ? (struct wc_linuxkm_drbg_ctx *)crypto_rng_ctx(current_crypto_default_rng) : NULL; + struct wc_linuxkm_drbg_ctx *ctx = (current_crypto_default_rng ? (struct wc_linuxkm_drbg_ctx *)crypto_rng_ctx(current_crypto_default_rng) : NULL); + if (ctx && (! ctx->rngs)) { + pr_err_once("BUG: get_default_drbg_ctx() found null ctx->rngs."); + return NULL; + } + else + return ctx; } static int wc__get_random_bytes(void *buf, size_t len) @@ -1259,8 +1308,9 @@ static int wc__get_random_bytes(void *buf, size_t len) return -EFAULT; else { int ret = crypto_rng_get_bytes(current_crypto_default_rng, buf, len); - if (ret) + if (ret) { pr_warn("BUG: wc_get_random_bytes falling through to native get_random_bytes with wc_linuxkm_drbg_default_instance_registered, ret=%d.", ret); + } return ret; } __builtin_unreachable(); @@ -1382,7 +1432,7 @@ static int wc_mix_pool_bytes(const void *buf, size_t len) { if (! (ctx = get_default_drbg_ctx())) return -EFAULT; - for (n = nr_cpu_ids - 1; n >= 0; --n) { + for (n = ctx->n_rngs - 1; n >= 0; --n) { struct wc_rng_inst *drbg = get_drbg_n(ctx, n); int V_offset = 0; @@ -1406,7 +1456,7 @@ static int wc_crng_reseed(void) { if (! ctx) return -EFAULT; - for (n = nr_cpu_ids - 1; n >= 0; --n) { + for (n = ctx->n_rngs - 1; n >= 0; --n) { struct wc_rng_inst *drbg = get_drbg_n(ctx, n); ((struct DRBG_internal *)drbg->rng.drbg)->reseedCtr = WC_RESEED_INTERVAL; if (can_sleep) { @@ -1786,6 +1836,7 @@ static int wc_linuxkm_drbg_startup(void) } else { pr_err("ERROR: wolfssl_linuxkm_register_random_bytes_handlers() failed: %d\n", ret); + return ret; } #elif defined(WOLFSSL_LINUXKM_USE_GET_RANDOM_KPROBES) @@ -1797,6 +1848,7 @@ static int wc_linuxkm_drbg_startup(void) } else { pr_err("ERROR: wc_get_random_bytes_kprobe installation failed: %d\n", ret); + return ret; } #ifdef WOLFSSL_LINUXKM_USE_GET_RANDOM_USER_KRETPROBE @@ -1807,6 +1859,7 @@ static int wc_linuxkm_drbg_startup(void) } else { pr_err("ERROR: wc_get_random_bytes_user_kprobe installation failed: %d\n", ret); + return ret; } #endif /* WOLFSSL_LINUXKM_USE_GET_RANDOM_USER_KRETPROBE */ diff --git a/linuxkm/patches/6.1.73/WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch b/linuxkm/patches/6.1.73/WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch index 7d90e7726..8b6e68e06 100644 --- a/linuxkm/patches/6.1.73/WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch +++ b/linuxkm/patches/6.1.73/WOLFSSL_LINUXKM_HAVE_GET_RANDOM_CALLBACKS-6v1v73.patch @@ -1,5 +1,5 @@ --- ./drivers/char/random.c.dist 2024-01-19 16:25:03.754138321 -0600 -+++ ./drivers/char/random.c 2025-07-02 10:45:31.769041473 -0500 ++++ ./drivers/char/random.c 2025-07-03 15:51:24.282595676 -0500 @@ -60,6 +60,260 @@ #include #include @@ -426,7 +426,7 @@ + /* fall through to reseed native crng too. */ + if (call_crng_reseed_cb() == 0) { + if (crng_ready()) -+ crng_reseed(NULL); ++ crng_reseed(); + return 0; + } +#endif From 688bc168de120fbc350931b892f395d1c9021cfc Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Thu, 3 Jul 2025 18:30:46 -0500 Subject: [PATCH 2/3] wolfcrypt/src/random.c: small stack refactor of noise[] in wc_Entropy_Get(). --- wolfcrypt/src/random.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index 6e44ff6e8..dc85e4b7b 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -1494,13 +1494,23 @@ static wolfSSL_Mutex entropy_mutex WOLFSSL_MUTEX_INITIALIZER_CLAUSE(entropy_mute int wc_Entropy_Get(int bits, unsigned char* entropy, word32 len) { int ret = 0; +#ifdef WOLFSSL_SMALL_STACK + byte *noise = NULL; +#else byte noise[MAX_NOISE_CNT]; +#endif /* Noise length is the number of 8 byte samples required to get the bits of * entropy requested. */ int noise_len = (bits + ENTROPY_EXTRA) / ENTROPY_MIN; +#ifdef WOLFSSL_SMALL_STACK + noise = (byte *)XMALLOC(MAX_NOISE_CNT, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (noise == NULL) + return MEMORY_E; +#endif + /* Lock the mutex as collection uses globals. */ - if (wc_LockMutex(&entropy_mutex) != 0) { + if ((ret == 0) && (wc_LockMutex(&entropy_mutex) != 0)) { ret = BAD_MUTEX_E; } @@ -1558,6 +1568,10 @@ int wc_Entropy_Get(int bits, unsigned char* entropy, word32 len) wc_UnLockMutex(&entropy_mutex); } +#ifdef WOLFSSL_SMALL_STACK + XFREE(noise, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; } From ef3a1a28d9e7fa6ce5c0d8c377bc3ab718d29f5d Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Thu, 3 Jul 2025 22:09:34 -0500 Subject: [PATCH 3/3] linuxkm/linuxkm_wc_port.h, linuxkm/module_hooks.c, and wolfcrypt/src/wc_port.c: fixes for spinlocks on CONFIG_ARM64; wolfcrypt/src/wc_port.c: include random.h, for Entropy_Init(). --- linuxkm/linuxkm_wc_port.h | 41 +++++++++++++++++++++++++++++---------- linuxkm/module_hooks.c | 7 +++++++ wolfcrypt/src/wc_port.c | 9 +++++++++ 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index f77a55481..fcd3453df 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -126,6 +126,7 @@ #if defined(__PIE__) && defined(CONFIG_ARM64) #define alt_cb_patch_nops my__alt_cb_patch_nops + #define queued_spin_lock_slowpath my__queued_spin_lock_slowpath #endif #include @@ -705,20 +706,30 @@ #ifdef CONFIG_ARM64 #ifdef __PIE__ - /* alt_cb_patch_nops defined early to allow shimming in system - * headers, but now we need the native one. + /* alt_cb_patch_nops and queued_spin_lock_slowpath are defined early + * to allow shimming in system headers, but now we need the native + * ones. */ #undef alt_cb_patch_nops typeof(my__alt_cb_patch_nops) *alt_cb_patch_nops; + #undef queued_spin_lock_slowpath + typeof(my__queued_spin_lock_slowpath) *queued_spin_lock_slowpath; #else typeof(alt_cb_patch_nops) *alt_cb_patch_nops; + typeof(queued_spin_lock_slowpath) *queued_spin_lock_slowpath; #endif #endif typeof(preempt_count) *preempt_count; - typeof(_raw_spin_lock_irqsave) *_raw_spin_lock_irqsave; - typeof(_raw_spin_trylock) *_raw_spin_trylock; - typeof(_raw_spin_unlock_irqrestore) *_raw_spin_unlock_irqrestore; + #ifndef _raw_spin_lock_irqsave + typeof(_raw_spin_lock_irqsave) *_raw_spin_lock_irqsave; + #endif + #ifndef _raw_spin_trylock + typeof(_raw_spin_trylock) *_raw_spin_trylock; + #endif + #ifndef _raw_spin_unlock_irqrestore + typeof(_raw_spin_unlock_irqrestore) *_raw_spin_unlock_irqrestore; + #endif typeof(_cond_resched) *_cond_resched; const void *_last_slot; @@ -885,9 +896,19 @@ #undef preempt_count /* just in case -- not a macro on x86. */ #define preempt_count (wolfssl_linuxkm_get_pie_redirect_table()->preempt_count) - #define _raw_spin_lock_irqsave (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_lock_irqsave) - #define _raw_spin_trylock (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_trylock) - #define _raw_spin_unlock_irqrestore (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_unlock_irqrestore) + + #ifndef WOLFSSL_LINUXKM_USE_MUTEXES + #ifndef _raw_spin_lock_irqsave + #define _raw_spin_lock_irqsave (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_lock_irqsave) + #endif + #ifndef _raw_spin_trylock + #define _raw_spin_trylock (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_trylock) + #endif + #ifndef _raw_spin_unlock_irqrestore + #define _raw_spin_unlock_irqrestore (wolfssl_linuxkm_get_pie_redirect_table()->_raw_spin_unlock_irqrestore) + #endif + #endif + #define _cond_resched (wolfssl_linuxkm_get_pie_redirect_table()->_cond_resched) /* this is defined in linux/spinlock.h as an inline that calls the unshimmed @@ -991,8 +1012,8 @@ static inline int wc_LockMutex(wolfSSL_Mutex* m) { - if (in_nmi() || in_hardirq() || in_softirq()) - return BAD_STATE_E; + if (in_nmi() || hardirq_count() || in_softirq()) + return -1; mutex_lock(m); return 0; } diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c index 0fdb432a6..bece5327d 100644 --- a/linuxkm/module_hooks.c +++ b/linuxkm/module_hooks.c @@ -658,13 +658,20 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { #endif wolfssl_linuxkm_pie_redirect_table.preempt_count = my_preempt_count; +#ifndef _raw_spin_lock_irqsave wolfssl_linuxkm_pie_redirect_table._raw_spin_lock_irqsave = _raw_spin_lock_irqsave; +#endif +#ifndef _raw_spin_trylock wolfssl_linuxkm_pie_redirect_table._raw_spin_trylock = _raw_spin_trylock; +#endif +#ifndef _raw_spin_unlock_irqrestore wolfssl_linuxkm_pie_redirect_table._raw_spin_unlock_irqrestore = _raw_spin_unlock_irqrestore; +#endif wolfssl_linuxkm_pie_redirect_table._cond_resched = _cond_resched; #ifdef CONFIG_ARM64 wolfssl_linuxkm_pie_redirect_table.alt_cb_patch_nops = alt_cb_patch_nops; + wolfssl_linuxkm_pie_redirect_table.queued_spin_lock_slowpath = queued_spin_lock_slowpath; #endif /* runtime assert that the table has no null slots after initialization. */ diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c index 17dc556e4..bf69834ce 100644 --- a/wolfcrypt/src/wc_port.c +++ b/wolfcrypt/src/wc_port.c @@ -26,6 +26,9 @@ #endif #include +#ifdef HAVE_ENTROPY_MEMUSE + #include +#endif #ifdef HAVE_ECC #include #endif @@ -4628,4 +4631,10 @@ noinstr void my__alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, return (wolfssl_linuxkm_get_pie_redirect_table()-> alt_cb_patch_nops)(alt, origptr, updptr, nr_inst); } + +void my__queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return (wolfssl_linuxkm_get_pie_redirect_table()-> + queued_spin_lock_slowpath)(lock, val); +} #endif