diff --git a/Makefile.am b/Makefile.am index e6d7fcb91..f3decb03c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -218,7 +218,7 @@ if BUILD_LINUXKM CFLAGS_FPU_DISABLE CFLAGS_FPU_ENABLE CFLAGS_SIMD_DISABLE CFLAGS_SIMD_ENABLE \ CFLAGS_AUTO_VECTORIZE_DISABLE CFLAGS_AUTO_VECTORIZE_ENABLE \ ASFLAGS_FPU_DISABLE_SIMD_ENABLE ASFLAGS_FPU_ENABLE_SIMD_DISABLE \ - ASFLAGS_FPUSIMD_DISABLE ASFLAGS_FPUSIMD_ENABLE + ASFLAGS_FPUSIMD_DISABLE ASFLAGS_FPUSIMD_ENABLE ENABLED_LINUXKM_BENCHMARKS module: +$(MAKE) -C linuxkm libwolfssl.ko diff --git a/configure.ac b/configure.ac index 01417eb04..08e32a07a 100644 --- a/configure.ac +++ b/configure.ac @@ -495,6 +495,16 @@ then fi AC_SUBST([ENABLED_LINUXKM_PIE]) +AC_ARG_ENABLE([linuxkm-benchmarks], + [AS_HELP_STRING([--enable-linuxkm-benchmarks],[Enable crypto benchmarking autorun at module load time for Linux kernel module (default: disabled)])], + [ENABLED_LINUXKM_BENCHMARKS=$enableval], + [ENABLED_LINUXKM_BENCHMARKS=no] + ) +if test "$ENABLED_LINUXKM_BENCHMARKS" = "yes" +then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LINUXKM_BENCHMARKS" +fi +AC_SUBST([ENABLED_LINUXKM_BENCHMARKS]) if test "$ENABLED_LINUXKM_DEFAULTS" = "yes" then @@ -7369,6 +7379,11 @@ fi if test "$ENABLED_ASYNCCRYPT" = "yes" then + if ! test -f ${srcdir}/wolfcrypt/src/async.c || ! test -f ${srcdir}/wolfssl/wolfcrypt/async.h + then + AC_MSG_ERROR([--enable-asynccrypt requested, but WOLFSSL_ASYNC_CRYPT source files are missing.]) + fi + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT -DHAVE_WOLF_BIGINT -DWOLFSSL_NO_HASH_RAW" # If no async backend (hardware or software) has been explicitly enabled, @@ -8810,9 +8825,9 @@ then for header in $openssl_headers do AC_CHECK_HEADER([$header], [], [ - AC_MSG_ERROR([Error including $header. Possible circular dependency introduced or missing include.]) + AC_MSG_ERROR([Header file inconsistency detected -- error including ${header}.]) ], [ - #include + #include <${OPTION_FILE}> extern int dummy_int_to_make_compiler_happy; ]) done @@ -8852,6 +8867,7 @@ echo " * FPU enable as flags: $ASFLAGS_FPU_ENABLE_SIMD_DISABLE" && \ echo " * SIMD+FPU disable as flags: $ASFLAGS_FPUSIMD_DISABLE" && \ echo " * SIMD+FPU enable as flags: $ASFLAGS_FPUSIMD_ENABLE" && \ echo " * Linux kernel module PIE: $ENABLED_LINUXKM_PIE" +echo " * Linux kernel module bench: $ENABLED_LINUXKM_BENCHMARKS" echo " * Debug enabled: $ax_enable_debug" echo " * Coverage enabled: $ax_enable_coverage" diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild index 57ad487cf..3133ea8fc 100644 --- a/linuxkm/Kbuild +++ b/linuxkm/Kbuild @@ -90,6 +90,11 @@ ifeq "$(ENABLED_LINUXKM_PIE)" "yes" $(obj)/linuxkm/module_hooks.o: ccflags-y += $(PIE_SUPPORT_FLAGS) endif +ifeq "$(ENABLED_LINUXKM_BENCHMARKS)" "yes" + $(obj)/linuxkm/module_hooks.o: ccflags-y = $(WOLFSSL_CFLAGS) $(CFLAGS_FPU_ENABLE) $(CFLAGS_SIMD_ENABLE) $(PIE_SUPPORT_FLAGS) + $(obj)/linuxkm/module_hooks.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_ENABLE_SIMD_DISABLE) +endif + asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPUSIMD_DISABLE) # vectorized implementations that are kernel-safe are listed here. diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c index ee30af987..31a7b9355 100644 --- a/linuxkm/linuxkm_memory.c +++ b/linuxkm/linuxkm_memory.c @@ -21,320 +21,262 @@ /* included by wolfcrypt/src/memory.c */ -#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - static union fpregs_state **wolfcrypt_linuxkm_fpu_states = NULL; - #else - static struct fpstate **wolfcrypt_linuxkm_fpu_states = NULL; - #endif - #else - static unsigned int *wolfcrypt_linuxkm_fpu_states = NULL; - #define WC_FPU_COUNT_MASK 0x7fffffffU - #define WC_FPU_SAVED_MASK 0x80000000U - #endif +#ifdef HAVE_KVMALLOC +/* adapted from kvrealloc() draft by Changli Gao, 2010-05-13 */ +void *lkm_realloc(void *ptr, size_t newsize) { + void *nptr; + size_t oldsize; - static WARN_UNUSED_RESULT inline int am_in_hard_interrupt_handler(void) - { - return (preempt_count() & (NMI_MASK | HARDIRQ_MASK)) != 0; + if (unlikely(newsize == 0)) { + kvfree(ptr); + return ZERO_SIZE_PTR; } - WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) - { - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - wolfcrypt_linuxkm_fpu_states = - (union fpregs_state **)kzalloc(nr_cpu_ids - * sizeof(struct fpu_state *), - GFP_KERNEL); - #else - wolfcrypt_linuxkm_fpu_states = - (struct fpstate **)kzalloc(nr_cpu_ids - * sizeof(struct fpstate *), - GFP_KERNEL); - #endif - #else - wolfcrypt_linuxkm_fpu_states = - (unsigned int *)kzalloc(nr_cpu_ids * sizeof(unsigned int), - GFP_KERNEL); - #endif + if (unlikely(ptr == NULL)) + return kvmalloc_node(newsize, GFP_KERNEL, NUMA_NO_NODE); - if (! wolfcrypt_linuxkm_fpu_states) { - pr_err("warning, allocation of %lu bytes for " - "wolfcrypt_linuxkm_fpu_states failed.\n", - nr_cpu_ids * sizeof(struct fpu_state *)); - return MEMORY_E; - } -#ifdef LINUXKM_SIMD_IRQ - { - typeof(nr_cpu_ids) i; - for (i=0; istate, which - * has stringent alignment requirements (64 byte cache - * line), but takes a pointer to the parent struct. work - * around this. - */ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) - struct fpu *fake_fpu_pointer = - (struct fpu *)(((char *)wolfcrypt_linuxkm_fpu_states[processor_id]) - - offsetof(struct fpu, state)); - copy_fpregs_to_fpstate(fake_fpu_pointer); - #elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - struct fpu *fake_fpu_pointer = - (struct fpu *)(((char *)wolfcrypt_linuxkm_fpu_states[processor_id]) - - offsetof(struct fpu, state)); - save_fpregs_to_fpstate(fake_fpu_pointer); - #else - struct fpu *fake_fpu_pointer = - (struct fpu *)(((char *)wolfcrypt_linuxkm_fpu_states[processor_id]) - - offsetof(struct fpu, fpstate)); - save_fpregs_to_fpstate(fake_fpu_pointer); - #endif - } - /* mark the slot as used. */ - ((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] = 1; - /* note, not preempt_enable()ing, mirroring kernel_fpu_begin() - * semantics, even though routine will have been entered already - * non-preemptable. - */ - return 0; - } else -#endif /* LINUXKM_SIMD_IRQ */ - { - preempt_enable(); - return BAD_STATE_E; - } + page = virt_to_head_page(ptr); + if (PageSlab(page) || PageCompound(page)) { + if (newsize < PAGE_SIZE) +#endif /* ! __PIE__ */ + return krealloc(ptr, newsize, GFP_KERNEL); +#ifndef __PIE__ + oldsize = ksize(ptr); } else { + oldsize = page->private; + if (newsize <= oldsize) + return ptr; + } +#endif /* ! __PIE__ */ + } - /* allow for nested calls */ -#ifdef LINUXKM_SIMD_IRQ - if (((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] != 0) { - if (((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] == 255) { - preempt_enable(); - pr_err("save_vector_registers_x86 recursion register overflow for " - "cpu id %d.\n", processor_id); - return BAD_STATE_E; - } else { - ++((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1]; - return 0; - } - } - kernel_fpu_begin(); - preempt_enable(); /* kernel_fpu_begin() does its own - * preempt_disable(). decrement ours. - */ - ((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] = 1; -#else /* !LINUXKM_SIMD_IRQ */ - if (wolfcrypt_linuxkm_fpu_states[processor_id] != 0U) { - if ((wolfcrypt_linuxkm_fpu_states[processor_id] & WC_FPU_COUNT_MASK) - == WC_FPU_COUNT_MASK) - { - preempt_enable(); - pr_err("save_vector_registers_x86 recursion register overflow for " - "cpu id %d.\n", processor_id); - return BAD_STATE_E; - } else { - ++wolfcrypt_linuxkm_fpu_states[processor_id]; - return 0; - } - } + nptr = kvmalloc_node(newsize, GFP_KERNEL, NUMA_NO_NODE); + if (nptr != NULL) { + memcpy(nptr, ptr, oldsize); + kvfree(ptr); + } - /* if kernel_fpu_begin() won't actually save the reg file (because - * it was already saved and invalidated, or because we're in a - * kernel thread), don't call kernel_fpu_begin() here, nor call - * kernel_fpu_end() in cleanup. this avoids pointless overhead. in - * kernels >=5.17.12 (from changes to irq_fpu_usable() in linux - * commit 59f5ede3bc0f, backported somewhere >5.17.5), this also - * fixes register corruption. - */ - if ((current->flags & PF_KTHREAD) || - test_thread_flag(TIF_NEED_FPU_LOAD)) - { - wolfcrypt_linuxkm_fpu_states[processor_id] = - WC_FPU_SAVED_MASK + 1U; /* set msb 1 to inhibit kernel_fpu_end() at cleanup. */ - /* keep preempt_disable()d from above. */ - } else { - kernel_fpu_begin(); - preempt_enable(); /* kernel_fpu_begin() does its own - * preempt_disable(). decrement ours. - */ - wolfcrypt_linuxkm_fpu_states[processor_id] = 1U; /* set msb 0 to trigger kernel_fpu_end() at cleanup. */ - } -#endif /* !LINUXKM_SIMD_IRQ */ + return nptr; +} +#endif /* HAVE_KVMALLOC */ +#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + +static unsigned int wc_linuxkm_fpu_states_n_tracked = 0; + +struct wc_thread_fpu_count_ent { + volatile pid_t pid; + unsigned int fpu_state; +}; +struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_states = NULL; +#define WC_FPU_COUNT_MASK 0x7fffffffU +#define WC_FPU_SAVED_MASK 0x80000000U + +WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) +{ + if (wc_linuxkm_fpu_states != NULL) { + static int warned_for_repeat_alloc = 0; + if (! warned_for_repeat_alloc) { + pr_err("attempt at repeat allocation" + " in allocate_wolfcrypt_linuxkm_fpu_states\n"); + warned_for_repeat_alloc = 1; + } + return BAD_STATE_E; + } + + if (nr_cpu_ids >= 16) + wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids * 2; + else + wc_linuxkm_fpu_states_n_tracked = 32; + + wc_linuxkm_fpu_states = + (struct wc_thread_fpu_count_ent *)malloc( + wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); + + if (! wc_linuxkm_fpu_states) { + pr_err("allocation of %lu bytes for " + "wc_linuxkm_fpu_states failed.\n", + nr_cpu_ids * sizeof(struct fpu_state *)); + return MEMORY_E; + } + + memset(wc_linuxkm_fpu_states, 0, wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); + + return 0; +} + +void free_wolfcrypt_linuxkm_fpu_states(void) { + struct wc_thread_fpu_count_ent *i, *i_endptr; + pid_t i_pid; + + if (wc_linuxkm_fpu_states == NULL) { + pr_err("free_wolfcrypt_linuxkm_fpu_states called" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n"); + return; + } + + for (i = wc_linuxkm_fpu_states, + i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; + i < i_endptr; + ++i) + { + i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); + if (i_pid == 0) + continue; + if (i->fpu_state != 0) { + pr_err("free_wolfcrypt_linuxkm_fpu_states called" + " with nonzero state 0x%x for pid %d.\n", i->fpu_state, i_pid); + i->fpu_state = 0; + } + } + + free(wc_linuxkm_fpu_states); + wc_linuxkm_fpu_states = NULL; +} + +/* lock-(mostly)-free thread-local storage facility for tracking recursive fpu pushing/popping */ +static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) { + struct wc_thread_fpu_count_ent *i, *i_endptr, *i_empty; + pid_t my_pid = task_pid_nr(current), i_pid; + + { + static int _warned_on_null = 0; + if (wc_linuxkm_fpu_states == NULL) + { + if (_warned_on_null == 0) { + pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); + _warned_on_null = 1; + } + return NULL; + } + } + + i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; + + for (;;) { + for (i = wc_linuxkm_fpu_states, + i_empty = NULL; + i < i_endptr; + ++i) + { + i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); + if (i_pid == my_pid) + return i; + if ((i_empty == NULL) && (i_pid == 0)) + i_empty = i; + } + if ((i_empty == NULL) || (! create_p)) + return NULL; + + i_pid = 0; + if (__atomic_compare_exchange_n( + &(i_empty->pid), + &i_pid, + my_pid, + 0 /* weak */, + __ATOMIC_SEQ_CST /* success_memmodel */, + __ATOMIC_SEQ_CST /* failure_memmodel */)) + { + return i_empty; + } + } +} + +static void wc_linuxkm_fpu_state_free(struct wc_thread_fpu_count_ent *ent) { + if (ent->fpu_state != 0) { + static int warned_nonzero_fpu_state = 0; + if (! warned_nonzero_fpu_state) { + pr_err("wc_linuxkm_fpu_state_free for pid %d" + " with nonzero fpu_state 0x%x.\n", ent->pid, ent->fpu_state); + warned_nonzero_fpu_state = 1; + } + ent->fpu_state = 0; + } + __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); +} + +WARN_UNUSED_RESULT int save_vector_registers_x86(void) +{ + struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(1); + if (pstate == NULL) + return ENOMEM; + + /* allow for nested calls */ + if (pstate->fpu_state != 0U) { + if ((pstate->fpu_state & WC_FPU_COUNT_MASK) + == WC_FPU_COUNT_MASK) + { + pr_err("save_vector_registers_x86 recursion register overflow for " + "pid %d.\n", pstate->pid); + return BAD_STATE_E; + } else { + ++pstate->fpu_state; return 0; } } - void restore_vector_registers_x86(void) - { - int processor_id = smp_processor_id(); - if ((wolfcrypt_linuxkm_fpu_states == NULL) -#ifdef LINUXKM_SIMD_IRQ - || (wolfcrypt_linuxkm_fpu_states[processor_id] == NULL) + if (irq_fpu_usable()) { +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + /* inhibit migration, which gums up the algorithm in kernel_fpu_{begin,end}(). */ + migrate_disable(); #endif - ) - { - pr_err("restore_vector_registers_x86 called for cpu id %d " - "with null context buffer.\n", processor_id); - return; - } + kernel_fpu_begin(); + pstate->fpu_state = 1U; /* set msb 0 to trigger kernel_fpu_end() at cleanup. */ + } else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) { + static int warned_fpu_forbidden = 0; + if (! warned_fpu_forbidden) + pr_err("save_vector_registers_x86 called from IRQ handler.\n"); + wc_linuxkm_fpu_state_free(pstate); + return EPERM; + } else { +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + migrate_disable(); +#endif + /* assume already safely in_kernel_fpu. */ + pstate->fpu_state = + WC_FPU_SAVED_MASK + 1U; /* set msb 1 to inhibit kernel_fpu_end() at cleanup. */ + } -#ifdef LINUXKM_SIMD_IRQ - if (((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] == 0) - { - pr_err("restore_vector_registers_x86 called for cpu id %d " - "without saved context.\n", processor_id); - return; - } - - if (--((unsigned char *)wolfcrypt_linuxkm_fpu_states[processor_id])[PAGE_SIZE-1] > 0) { - preempt_enable(); /* preempt_disable count will still be nonzero after this decrement. */ - return; - } - - if (am_in_hard_interrupt_handler()) { - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) - copy_kernel_to_fpregs(wolfcrypt_linuxkm_fpu_states[processor_id]); - #elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - __restore_fpregs_from_fpstate(wolfcrypt_linuxkm_fpu_states[processor_id], - xfeatures_mask_all); - #else - restore_fpregs_from_fpstate(wolfcrypt_linuxkm_fpu_states[processor_id], - fpu_kernel_cfg.max_features); - #endif - preempt_enable(); - } else { - kernel_fpu_end(); - } -#else /* !LINUXKM_SIMD_IRQ */ - if ((wolfcrypt_linuxkm_fpu_states[processor_id] & WC_FPU_COUNT_MASK) == 0U) - { - pr_err("restore_vector_registers_x86 called for cpu id %d " - "without saved context.\n", processor_id); - return; - } - - if ((--wolfcrypt_linuxkm_fpu_states[processor_id] & WC_FPU_COUNT_MASK) > 0U) { - preempt_enable(); /* preempt_disable count may still be nonzero - * after this decrement, but any remaining - * count(s) aren't ours. - */ - return; - } - - if (wolfcrypt_linuxkm_fpu_states[processor_id] == 0U) { - kernel_fpu_end(); - } else { - preempt_enable(); /* preempt_disable count will still be nonzero - * after this decrement. - */ - wolfcrypt_linuxkm_fpu_states[processor_id] = 0U; - } -#endif /* !LINUXKM_SIMD_IRQ */ + return 0; +} +void restore_vector_registers_x86(void) +{ + struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0); + if (pstate == NULL) { + pr_err("restore_vector_registers_x86 called by pid %d " + "with no saved state.\n", task_pid_nr(current)); return; } + + if ((--pstate->fpu_state & WC_FPU_COUNT_MASK) > 0U) { + return; + } + + if (pstate->fpu_state == 0U) + kernel_fpu_end(); + else + pstate->fpu_state = 0U; +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + migrate_enable(); +#endif + + wc_linuxkm_fpu_state_free(pstate); + + return; +} #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS && CONFIG_X86 */ #if defined(__PIE__) && (LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)) diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index 8b839161c..bc17aa57f 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -24,6 +24,12 @@ #ifndef LINUXKM_WC_PORT_H #define LINUXKM_WC_PORT_H + #include + + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) + #error Unsupported kernel. + #endif + #ifdef HAVE_CONFIG_H #ifndef PACKAGE_NAME #error wc_port.h included before config.h @@ -59,6 +65,23 @@ (int)_xatoi_res; \ }) + /* Kbuild+gcc on x86 doesn't consistently honor the default ALIGN16 on stack objects, + * but gives adequate alignment with "32". + */ + #if defined(CONFIG_X86) && !defined(ALIGN16) + #define ALIGN16 __attribute__ ( (aligned (32))) + #endif + + /* kvmalloc()/kvfree() and friends added in linux commit a7c3e901 */ + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) + #define HAVE_KVMALLOC + #endif + + /* kernel printf doesn't implement fp. */ + #ifndef WOLFSSL_NO_FLOAT_FMT + #define WOLFSSL_NO_FLOAT_FMT + #endif + #ifdef BUILDING_WOLFSSL #if defined(CONFIG_MIPS) && defined(HAVE_LINUXKM_PIE_SUPPORT) @@ -95,7 +118,6 @@ #include #include - #include #include #include #include @@ -124,6 +146,8 @@ #ifndef CONFIG_X86 #error X86 SIMD extensions requested, but CONFIG_X86 is not set. #endif + #define WOLFSSL_LINUXKM_SIMD + #define WOLFSSL_LINUXKM_SIMD_X86 #ifndef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS #define WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS #endif @@ -133,6 +157,8 @@ #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) #error ARM SIMD extensions requested, but CONFIG_ARM* is not set. #endif + #define WOLFSSL_LINUXKM_SIMD + #define WOLFSSL_LINUXKM_SIMD_ARM #ifndef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS #define WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS #endif @@ -142,26 +168,17 @@ #endif #endif + /* benchmarks.c uses floating point math, so needs a working SAVE_VECTOR_REGISTERS(). */ + #if defined(WOLFSSL_LINUXKM_BENCHMARKS) && !defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) + #define WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS + #endif + #if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) - #define WOLFSSL_LINUXKM_SIMD - #define WOLFSSL_LINUXKM_SIMD_X86 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) #include #else #include #endif - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) - #include - #endif - #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) - #error LINUXKM_SIMD_IRQ is unavailable on linux >= 5.16 (missing exports around fpregs) - /* - * #include - * #include - */ - #endif - #endif #ifndef SAVE_VECTOR_REGISTERS #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_x86(); if (_svr_ret != 0) { fail_clause } } #endif @@ -169,12 +186,7 @@ #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_x86() #endif #elif defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) - #define WOLFSSL_LINUXKM_SIMD - #define WOLFSSL_LINUXKM_SIMD_ARM #include - #ifdef LINUXKM_SIMD_IRQ - #error LINUXKM_SIMD_IRQ is unavailable on ARM (not implemented) - #endif #ifndef SAVE_VECTOR_REGISTERS #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } } #endif @@ -195,11 +207,6 @@ #define NO_THREAD_LS #define NO_ATTRIBUTE_CONSTRUCTOR - /* kvmalloc()/kvfree() and friends added in linux commit a7c3e901 */ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) - #define HAVE_KVMALLOC - #endif - #ifdef HAVE_FIPS extern int wolfCrypt_FIPS_first(void); extern int wolfCrypt_FIPS_last(void); @@ -215,7 +222,7 @@ #endif #if defined(__PIE__) && !defined(USE_WOLFSSL_LINUXKM_PIE_REDIRECT_TABLE) - #error "compiling -fPIE without PIE support." + #error "compiling -fPIE requires PIE redirect table." #endif #if defined(HAVE_FIPS) && !defined(HAVE_LINUXKM_PIE_SUPPORT) @@ -307,42 +314,37 @@ struct task_struct *(*get_current)(void); int (*preempt_count)(void); - #ifdef WOLFSSL_LINUXKM_SIMD_X86 - typeof(irq_fpu_usable) *irq_fpu_usable; - /* kernel_fpu_begin() replaced by kernel_fpu_begin_mask() in commit e4512289, - * released in kernel 5.11, backported to 5.4.93 - */ - #ifdef kernel_fpu_begin - typeof(kernel_fpu_begin_mask) *kernel_fpu_begin_mask; - #else - typeof(kernel_fpu_begin) *kernel_fpu_begin; - #endif - typeof(kernel_fpu_end) *kernel_fpu_end; + #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) - typeof(copy_fpregs_to_fpstate) *copy_fpregs_to_fpstate; - typeof(copy_kernel_to_fpregs) *copy_kernel_to_fpregs; - #elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - typeof(save_fpregs_to_fpstate) *save_fpregs_to_fpstate; - typeof(__restore_fpregs_from_fpstate) *__restore_fpregs_from_fpstate; - typeof(xfeatures_mask_all) *xfeatures_mask_all; - /* - * #else - * typeof(save_fpregs_to_fpstate) *save_fpregs_to_fpstate; - * typeof(restore_fpregs_from_fpstate) *restore_fpregs_from_fpstate; - * typeof(fpu_kernel_cfg) *fpu_kernel_cfg; - */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) + typeof(cpu_number) *cpu_number; + #else + typeof(pcpu_hot) *pcpu_hot; #endif - #endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) - typeof(cpu_number) *cpu_number; - #else - typeof(pcpu_hot) *pcpu_hot; - #endif - typeof(nr_cpu_ids) *nr_cpu_ids; + typeof(nr_cpu_ids) *nr_cpu_ids; - #endif /* WOLFSSL_LINUXKM_SIMD_X86 */ + #if defined(CONFIG_SMP) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + /* note the current and needed version of these were added in af449901b8 (2020-Sep-17) */ + typeof(migrate_disable) *migrate_disable; + typeof(migrate_enable) *migrate_enable; + #endif + + #ifdef CONFIG_X86 + typeof(irq_fpu_usable) *irq_fpu_usable; + /* kernel_fpu_begin() replaced by kernel_fpu_begin_mask() in commit e4512289, + * released in kernel 5.11, backported to 5.4.93 + */ + #ifdef kernel_fpu_begin + typeof(kernel_fpu_begin_mask) *kernel_fpu_begin_mask; + #else + typeof(kernel_fpu_begin) *kernel_fpu_begin; + #endif + typeof(kernel_fpu_end) *kernel_fpu_end; + #else /* !CONFIG_X86 */ + #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. + #endif /* arch */ + + #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ typeof(__mutex_init) *__mutex_init; #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) @@ -452,37 +454,31 @@ #undef preempt_count #define preempt_count (wolfssl_linuxkm_get_pie_redirect_table()->preempt_count) - #ifdef WOLFSSL_LINUXKM_SIMD_X86 - #define irq_fpu_usable (wolfssl_linuxkm_get_pie_redirect_table()->irq_fpu_usable) - #ifdef kernel_fpu_begin - #define kernel_fpu_begin_mask (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin_mask) - #else - #define kernel_fpu_begin (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin) - #endif - #define kernel_fpu_end (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_end) - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) - #define copy_fpregs_to_fpstate (wolfssl_linuxkm_get_pie_redirect_table()->copy_fpregs_to_fpstate) - #define copy_kernel_to_fpregs (wolfssl_linuxkm_get_pie_redirect_table()->copy_kernel_to_fpregs) - #elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - #define save_fpregs_to_fpstate (wolfssl_linuxkm_get_pie_redirect_table()->save_fpregs_to_fpstate) - #define __restore_fpregs_from_fpstate (wolfssl_linuxkm_get_pie_redirect_table()->__restore_fpregs_from_fpstate) - #define xfeatures_mask_all (*(wolfssl_linuxkm_get_pie_redirect_table()->xfeatures_mask_all)) - /* - * #else - * #define save_fpregs_to_fpstate (wolfssl_linuxkm_get_pie_redirect_table()->save_fpregs_to_fpstate) - * #define restore_fpregs_from_fpstate (wolfssl_linuxkm_get_pie_redirect_table()->restore_fpregs_from_fpstate) - * #define fpu_kernel_cfg (*(wolfssl_linuxkm_get_pie_redirect_table()->fpu_kernel_cfg)) - */ - #endif - #endif + #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) #define cpu_number (*(wolfssl_linuxkm_get_pie_redirect_table()->cpu_number)) #else #define pcpu_hot (*(wolfssl_linuxkm_get_pie_redirect_table()->pcpu_hot)) #endif #define nr_cpu_ids (*(wolfssl_linuxkm_get_pie_redirect_table()->nr_cpu_ids)) - #endif + + #if defined(CONFIG_SMP) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + #define migrate_disable (*(wolfssl_linuxkm_get_pie_redirect_table()->migrate_disable)) + #define migrate_enable (*(wolfssl_linuxkm_get_pie_redirect_table()->migrate_enable)) + #endif + + #ifdef CONFIG_X86 + #define irq_fpu_usable (wolfssl_linuxkm_get_pie_redirect_table()->irq_fpu_usable) + #ifdef kernel_fpu_begin + #define kernel_fpu_begin_mask (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin_mask) + #else + #define kernel_fpu_begin (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin) + #endif + #define kernel_fpu_end (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_end) + #else /* !CONFIG_X86 */ + #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. + #endif /* archs */ + #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ #define __mutex_init (wolfssl_linuxkm_get_pie_redirect_table()->__mutex_init) #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) @@ -515,9 +511,9 @@ #endif /* USE_WOLFSSL_LINUXKM_PIE_REDIRECT_TABLE */ -#ifdef WOLFSSL_LINUXKM_SIMD +#ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS -#ifdef WOLFSSL_LINUXKM_SIMD_X86 +#ifdef CONFIG_X86 extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); extern void free_wolfcrypt_linuxkm_fpu_states(void); @@ -547,7 +543,7 @@ #endif -#endif /* WOLFSSL_LINUXKM_SIMD */ +#endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ /* remove this multifariously conflicting macro, picked up from * Linux arch//include/asm/current.h. @@ -556,22 +552,6 @@ #undef current #endif - /* prevent gcc's mm_malloc.h from being included, since it unconditionally - * includes stdlib.h, which is kernel-incompatible. - */ - #define _MM_MALLOC_H_INCLUDED - - #ifdef HAVE_KVMALLOC - #define malloc(x) kvmalloc_node(x, GFP_KERNEL, NUMA_NO_NODE) - #define free(x) kvfree(x) - void *lkm_realloc(void *ptr, size_t newsize); - #define realloc(x, y) lkm_realloc(x, y) - #else - #define malloc(x) kmalloc(x, GFP_KERNEL) - #define free(x) kfree(x) - #define realloc(x,y) krealloc(x, y, GFP_KERNEL) - #endif - /* min() and max() in linux/kernel.h over-aggressively type-check, producing * myriad spurious -Werrors throughout the codebase. */ @@ -618,9 +598,41 @@ #include typedef struct mutex wolfSSL_Mutex; - #define XMALLOC(s, h, t) ({(void)(h); (void)(t); kmalloc(s, GFP_KERNEL);}) - #define XFREE(p, h, t) ({void* _xp; (void)(h); _xp = (p); if(_xp) kfree(_xp);}) - #define XREALLOC(p, n, h, t) ({(void)(h); (void)(t); krealloc((p), (n), GFP_KERNEL);}) + /* prevent gcc's mm_malloc.h from being included, since it unconditionally + * includes stdlib.h, which is kernel-incompatible. + */ + #define _MM_MALLOC_H_INCLUDED + + /* fun fact: since linux commit 59bb47985c, kmalloc with power-of-2 size is + * aligned to the size. + */ + #define WC_LINUXKM_ROUND_UP_P_OF_2(x) ( \ + { \ + size_t _alloc_sz = (x); \ + _alloc_sz = 1UL << ((sizeof(_alloc_sz) * 8UL) - __builtin_clzl(_alloc_sz)); \ + _alloc_sz; \ + }) + #ifdef HAVE_KVMALLOC + #define malloc(size) kvmalloc_node(WC_LINUXKM_ROUND_UP_P_OF_2(size), GFP_KERNEL, NUMA_NO_NODE) + #define free(ptr) kvfree(ptr) + void *lkm_realloc(void *ptr, size_t newsize); + #define realloc(ptr, newsize) lkm_realloc(ptr, WC_LINUXKM_ROUND_UP_P_OF_2(newsize)) + #else + #define malloc(size) kmalloc(WC_LINUXKM_ROUND_UP_P_OF_2(size), GFP_KERNEL) + #define free(ptr) kfree(ptr) + #define realloc(ptr, newsize) krealloc(ptr, WC_LINUXKM_ROUND_UP_P_OF_2(newsize), GFP_KERNEL) + #endif + +#ifdef WOLFSSL_TRACK_MEMORY + #include + #define XMALLOC(s, h, t) ({(void)(h); (void)(t); wolfSSL_Malloc(s);}) + #define XFREE(p, h, t) ({void* _xp; (void)(h); _xp = (p); if(_xp) wolfSSL_Free(_xp);}) + #define XREALLOC(p, n, h, t) ({(void)(h); (void)(t); wolfSSL_Realloc(p, n);}) +#else + #define XMALLOC(s, h, t) ({(void)(h); (void)(t); malloc(s);}) + #define XFREE(p, h, t) ({void* _xp; (void)(h); _xp = (p); if(_xp) free(_xp);}) + #define XREALLOC(p, n, h, t) ({(void)(h); (void)(t); realloc(p, n);}) +#endif #include diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c index 3f17217d3..fb7f11420 100644 --- a/linuxkm/module_hooks.c +++ b/linuxkm/module_hooks.c @@ -113,6 +113,15 @@ static void lkmFipsCb(int ok, int err, const char* hash) static int updateFipsHash(void); #endif +#ifdef WOLFSSL_LINUXKM_BENCHMARKS +#undef HAVE_PTHREAD +#define STRING_USER +#define NO_MAIN_FUNCTION +#define current_time benchmark_current_time +#define WOLFSSL_NO_FLOAT_FMT +#include "wolfcrypt/benchmark/benchmark.c" +#endif /* WOLFSSL_LINUXKM_BENCHMARKS */ + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) static int __init wolfssl_init(void) #else @@ -202,10 +211,9 @@ static int wolfssl_init(void) * the true module start address, which is potentially useful to an * attacker. */ - pr_info("wolfCrypt container hashes (spans): %x (%lu) %x (%lu), text base %pK, ro base %pK\n", + pr_info("wolfCrypt container hashes (spans): text 0x%x (%lu), rodata 0x%x (%lu)\n", text_hash, pie_text_end-pie_text_start, - rodata_hash, pie_rodata_end-pie_rodata_start, - THIS_MODULE_TEXT_BASE, THIS_MODULE_RO_BASE); + rodata_hash, pie_rodata_end-pie_rodata_start); } #endif /* HAVE_LINUXKM_PIE_SUPPORT */ @@ -277,6 +285,10 @@ static int wolfssl_init(void) pr_info("wolfCrypt self-test passed.\n"); #endif +#ifdef WOLFSSL_LINUXKM_BENCHMARKS + wolfcrypt_benchmark_main(0, (char**)NULL); +#endif + #ifdef WOLFCRYPT_ONLY pr_info("wolfCrypt " LIBWOLFSSL_VERSION_STRING " loaded%s" ".\nSee https://www.wolfssl.com/ for more information.\n" @@ -334,15 +346,6 @@ static int my_preempt_count(void) { return preempt_count(); } -#if defined(WOLFSSL_LINUXKM_SIMD_X86) && (LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0)) -static int my_copy_fpregs_to_fpstate(struct fpu *fpu) { - return copy_fpregs_to_fpstate(fpu); -} -static void my_copy_kernel_to_fpregs(union fpregs_state *fpstate) { - copy_kernel_to_fpregs(fpstate); -} -#endif - static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { memset( &wolfssl_linuxkm_pie_redirect_table, @@ -430,6 +433,20 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread; wolfssl_linuxkm_pie_redirect_table.preempt_count = my_preempt_count; +#ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS + + #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) + wolfssl_linuxkm_pie_redirect_table.cpu_number = &cpu_number; + #else + wolfssl_linuxkm_pie_redirect_table.pcpu_hot = &pcpu_hot; + #endif + wolfssl_linuxkm_pie_redirect_table.nr_cpu_ids = &nr_cpu_ids; + + #if defined(CONFIG_SMP) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + wolfssl_linuxkm_pie_redirect_table.migrate_disable = &migrate_disable; + wolfssl_linuxkm_pie_redirect_table.migrate_enable = &migrate_enable; + #endif + #ifdef WOLFSSL_LINUXKM_SIMD_X86 wolfssl_linuxkm_pie_redirect_table.irq_fpu_usable = irq_fpu_usable; #ifdef kernel_fpu_begin @@ -440,29 +457,9 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { kernel_fpu_begin; #endif wolfssl_linuxkm_pie_redirect_table.kernel_fpu_end = kernel_fpu_end; - #ifdef LINUXKM_SIMD_IRQ - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) - wolfssl_linuxkm_pie_redirect_table.copy_fpregs_to_fpstate = my_copy_fpregs_to_fpstate; - wolfssl_linuxkm_pie_redirect_table.copy_kernel_to_fpregs = my_copy_kernel_to_fpregs; - #elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) - wolfssl_linuxkm_pie_redirect_table.save_fpregs_to_fpstate = save_fpregs_to_fpstate; - wolfssl_linuxkm_pie_redirect_table.__restore_fpregs_from_fpstate = __restore_fpregs_from_fpstate; - wolfssl_linuxkm_pie_redirect_table.xfeatures_mask_all = &xfeatures_mask_all; - /* - * #else - * wolfssl_linuxkm_pie_redirect_table.save_fpregs_to_fpstate = save_fpregs_to_fpstate; - * wolfssl_linuxkm_pie_redirect_table.restore_fpregs_from_fpstate = restore_fpregs_from_fpstate; - * wolfssl_linuxkm_pie_redirect_table.fpu_kernel_cfg = &fpu_kernel_cfg; - */ - #endif - #endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) - wolfssl_linuxkm_pie_redirect_table.cpu_number = &cpu_number; - #else - wolfssl_linuxkm_pie_redirect_table.pcpu_hot = &pcpu_hot; - #endif - wolfssl_linuxkm_pie_redirect_table.nr_cpu_ids = &nr_cpu_ids; -#endif +#endif /* WOLFSSL_LINUXKM_SIMD_X86 */ + +#endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ wolfssl_linuxkm_pie_redirect_table.__mutex_init = __mutex_init; #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) diff --git a/src/internal.c b/src/internal.c index 978f89543..c6c22bc01 100644 --- a/src/internal.c +++ b/src/internal.c @@ -33329,7 +33329,6 @@ static int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx, ret = DoClientHelloStateless(ssl, input, inOutIdx, helloSz); if (ret != 0 || !ssl->options.dtlsStateful) { int alertType = TranslateErrorToAlert(ret); - if (alertType != invalid_alert) if (alertType != invalid_alert) { int err; diff --git a/src/ssl_asn1.c b/src/ssl_asn1.c index 20dbe6758..5c5d90685 100644 --- a/src/ssl_asn1.c +++ b/src/ssl_asn1.c @@ -3653,7 +3653,9 @@ static int wolfssl_asn1_time_to_tm(const WOLFSSL_ASN1_TIME* asnTime, int asn1TimeBufLen; int i = 0; #ifdef XMKTIME - struct tm localTm = {0}; + struct tm localTm; + + XMEMSET(&localTm, 0, sizeof localTm); #endif /* Get the string buffer - fixed array, can't fail. */ diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 5d354d9be..6609a2e20 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -65,6 +65,35 @@ #include #include +#ifdef WOLFSSL_NO_FLOAT_FMT + #define FLT_FMT "%0ld,%09lu" + #define FLT_FMT_PREC "%0ld.%0*lu" + #define FLT_FMT_PREC2 FLT_FMT_PREC + #define FLT_FMT_ARGS(x) (long)(x), ((x) < 0) ? \ + (unsigned long)(-(((x) - (double)(long)(x)) * 1000000000.0)) : \ + (unsigned long)(((x) - (double)(long)(x)) * 1000000000.0) + static const double pow_10_array[] = { 0.0, 1.0, 10.0, 100.0, 1000.0, \ + 10000.0, 100000.0, 1000000.0, \ + 10000000.0, 100000000.0, \ + 1000000000.0 }; + #define FLT_FMT_PREC_ARGS(p, x) \ + (long)(x), \ + p, \ + (x) >= 0.0 ? \ + (unsigned long int)((((x) - (double)(long)(x)) * \ + pow_10_array[(p)+1]) + 0.5) : \ + (unsigned long int)((((-(x)) - (double)((long)-(x))) * \ + pow_10_array[(p)+1]) + 0.5) + #define FLT_FMT_PREC2_ARGS(w, p, x) FLT_FMT_PREC_ARGS(p, x) +#else + #define FLT_FMT "%f" + #define FLT_FMT_PREC "%.*f" + #define FLT_FMT_PREC2 "%*.*f" + #define FLT_FMT_ARGS(x) x + #define FLT_FMT_PREC_ARGS(p, x) p, x + #define FLT_FMT_PREC2_ARGS(w, p, x) w, p, x +#endif + #ifdef WOLFSSL_ESPIDF #if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6) #include "driver/gptimer.h" @@ -921,13 +950,16 @@ static const char* bench_desc_words[][15] = { #define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles(); #define END_INTEL_CYCLES total_cycles = get_intel_cycles() - total_cycles; /* s == size in bytes that 1 count represents, normally BENCH_SIZE */ - #define SHOW_INTEL_CYCLES(b, n, s) \ - (void)XSNPRINTF((b) + XSTRLEN(b), (n) - XSTRLEN(b), " %s = %6.2f\n", \ - bench_result_words1[lng_index][2], \ - count == 0 ? 0 : (double)total_cycles / ((word64)count*(s))) - #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ - (void)XSNPRINTF((b) + XSTRLEN(b), (n) - XSTRLEN(b), "%.6f,\n", \ - count == 0 ? 0 : (double)total_cycles / ((word64)count*(s))) + #define SHOW_INTEL_CYCLES(b, n, s) \ + (void)XSNPRINTF((b) + XSTRLEN(b), (n) - XSTRLEN(b), \ + " %s = " FLT_FMT_PREC2 "\n", \ + bench_result_words1[lng_index][2], \ + FLT_FMT_PREC2_ARGS(6, 2, count == 0 ? 0 : \ + (double)total_cycles / ((word64)count*(s)))) + #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ + (void)XSNPRINTF((b) + XSTRLEN(b), (n) - XSTRLEN(b), FLT_FMT_PREC ",\n", \ + FLT_FMT_PREC_ARGS(6, count == 0 ? 0 : \ + (double)total_cycles / ((word64)count*(s)))) #elif defined(LINUX_CYCLE_COUNT) #include #include @@ -938,26 +970,27 @@ static const char* bench_desc_words[][15] = { static THREAD_LS_T int cycles = -1; static THREAD_LS_T struct perf_event_attr atr; - #define INIT_CYCLE_COUNTER do { \ - atr.type = PERF_TYPE_HARDWARE; \ - atr.config = PERF_COUNT_HW_CPU_CYCLES; \ - cycles = (int)syscall(__NR_perf_event_open, &atr, 0, -1, -1, 0); \ + #define INIT_CYCLE_COUNTER do { \ + atr.type = PERF_TYPE_HARDWARE; \ + atr.config = PERF_COUNT_HW_CPU_CYCLES; \ + cycles = (int)syscall(__NR_perf_event_open, &atr, 0, -1, -1, 0); \ } while (0); #define BEGIN_INTEL_CYCLES read(cycles, &begin_cycles, sizeof(begin_cycles)); - #define END_INTEL_CYCLES do { \ - read(cycles, &total_cycles, sizeof(total_cycles)); \ - total_cycles = total_cycles - begin_cycles; \ + #define END_INTEL_CYCLES do { \ + read(cycles, &total_cycles, sizeof(total_cycles)); \ + total_cycles = total_cycles - begin_cycles; \ } while (0); /* s == size in bytes that 1 count represents, normally BENCH_SIZE */ - #define SHOW_INTEL_CYCLES(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \ - bench_result_words1[lng_index][2], \ - (float)total_cycles / (count*s)) - #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.6f,\n", \ - (float)total_cycles / (count*s)) + #define SHOW_INTEL_CYCLES(b, n, s) \ + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), \ + " %s = " FLT_FMT_PREC2 "\n", \ + bench_result_words1[lng_index][2], \ + FLT_FMT_PREC2_ARGS(6, 2, (double)total_cycles / (count*s))) + #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \ + FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s))) #elif defined(SYNERGY_CYCLE_COUNT) #include "hal_data.h" @@ -969,13 +1002,14 @@ static const char* bench_desc_words[][15] = { #define END_INTEL_CYCLES total_cycles = DWT->CYCCNT - begin_cycles; /* s == size in bytes that 1 count represents, normally BENCH_SIZE */ - #define SHOW_INTEL_CYCLES(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \ - bench_result_words1[lng_index][2], \ - (float)total_cycles / (count*s)) - #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.6f,\n", \ - (float)total_cycles / (count*s)) + #define SHOW_INTEL_CYCLES(b, n, s) \ + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), \ + " %s = " FLT_FMT_PREC2 "\n", \ + bench_result_words1[lng_index][2], \ + FLT_FMT_PREC2_ARGS(6, 2, (double)total_cycles / (count*s))) + #define SHOW_INTEL_CYCLES_CSV(b, n, s) \ + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \ + FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s))) #elif defined(WOLFSSL_ESPIDF) static THREAD_LS_T word64 begin_cycles; static THREAD_LS_T word64 total_cycles; @@ -1005,14 +1039,15 @@ static const char* bench_desc_words[][15] = { total_cycles = (get_xtensa_cycles() - begin_cycles); #define SHOW_ESP_CYCLES(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \ - bench_result_words1[lng_index][2], \ - (float)total_cycles / (count*s) \ + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), \ + " %s = " FLT_FMT_PREC2 "\n", \ + bench_result_words1[lng_index][2], \ + FLT_FMT_PREC2_ARGS(6, 2, (double)total_cycles / (count*s)) \ ) #define SHOW_ESP_CYCLES_CSV(b, n, s) \ - (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.6f,\n", \ - (float)total_cycles / (count*s)) + (void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \ + FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s))) /* xthal_get_ccount_ex() is a single-overflow-tolerant extension to ** the Espressif `unsigned xthal_get_ccount()` which is known to overflow @@ -1283,7 +1318,7 @@ static const char* bench_result_words2[][5] = { } #else - #define BENCH_MAX_PENDING (1) + #define BENCH_MAX_PENDING 1 #define BENCH_ASYNC_GET_DEV(obj) NULL static WC_INLINE int bench_async_check(int* ret, void* asyncDev, @@ -1590,14 +1625,16 @@ typedef enum bench_stat_type { for (bstat = bench_stats_head; bstat != NULL; ) { if (bstat->type == BENCH_STAT_SYM) { - printf("%-16s%s %8.3f %s/s\n", bstat->desc, - BENCH_DEVID_GET_NAME(bstat->useDeviceID), bstat->perfsec, + printf("%-16s%s " FLT_FMT_PREC2 " %s/s\n", bstat->desc, + BENCH_DEVID_GET_NAME(bstat->useDeviceID), + FLT_FMT_PREC2_ARGS(8, 3, bstat->perfsec), base2 ? "MB" : "mB"); } else { - printf("%-5s %4d %-9s %s %.3f ops/sec\n", + printf("%-5s %4d %-9s %s " FLT_FMT_PREC " ops/sec\n", bstat->algo, bstat->strength, bstat->desc, - BENCH_DEVID_GET_NAME(bstat->useDeviceID), bstat->perfsec); + BENCH_DEVID_GET_NAME(bstat->useDeviceID), + FLT_FMT_PREC_ARGS(3, bstat->perfsec)); } bstat = bstat->next; @@ -1651,12 +1688,14 @@ typedef enum bench_stat_type { for (i=0; itype == BENCH_STAT_SYM) { - printf("%-16s %8.3f %s/s\n", bstat->desc, bstat->perfsec, + printf("%-16s " FLT_FMT_PREC2 " %s/s\n", bstat->desc, + FLT_FMT_PREC2_ARGS(8, 3, bstat->perfsec), base2 ? "MB" : "mB"); } else if (bstat->type == BENCH_STAT_ASYM) { - printf("%-5s %4d %-9s %.3f ops/sec\n", - bstat->algo, bstat->strength, bstat->desc, bstat->perfsec); + printf("%-5s %4d %-9s " FLT_FMT_PREC " ops/sec\n", + bstat->algo, bstat->strength, bstat->desc, + FLT_FMT_PREC_ARGS(3, bstat->perfsec)); } } } @@ -1677,8 +1716,8 @@ static WC_INLINE void bench_stats_start(int* count, double* start) *start = current_time(1); #ifdef WOLFSSL_ESPIDF - ESP_LOGV(TAG, "finish total_cycles = %llu, start=%f", - total_cycles, *start ); + ESP_LOGV(TAG, "finish total_cycles = %llu, start=" FLT_FMT, + total_cycles, FLT_FMT_ARGS(*start) ); BEGIN_ESP_CYCLES #else @@ -1686,6 +1725,14 @@ static WC_INLINE void bench_stats_start(int* count, double* start) #endif } +#ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS + #define bench_stats_start(count, start) do { \ + SAVE_VECTOR_REGISTERS(pr_err("SAVE_VECTOR_REGISTERS failed for benchmark run."); \ + return; ); \ + bench_stats_start(count, start); \ + } while (0) +#endif + static WC_INLINE int bench_stats_check(double start) { return ((current_time(0) - start) < BENCH_MIN_RUNTIME_SEC); @@ -1785,10 +1832,12 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, { double total, persec = 0, blocks = (double)count; const char* blockType; - char msg[__BENCHMARK_MAXIMUM_LINE_LENGTH] = {0}; + char msg[__BENCHMARK_MAXIMUM_LINE_LENGTH]; const char** word = bench_result_words1[lng_index]; static int sym_header_printed = 0; + XMEMSET(msg, 0, sizeof(msg)); + #ifdef WOLFSSL_ESPIDF END_ESP_CYCLES #else @@ -1873,9 +1922,9 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, /* note this codepath brings in all the fields from the non-CSV case. */ #ifdef WOLFSSL_ESPIDF #ifdef HAVE_GET_CYCLES - (void)XSNPRINTF(msg, sizeof(msg), "sym,%s,%s,%lu,%f,%f,%lu,", desc, + (void)XSNPRINTF(msg, sizeof(msg), "sym,%s,%s,%lu," FLT_FMT "," FLT_FMT ",%lu,", desc, BENCH_DEVID_GET_NAME(useDeviceID), - bytes_processed, total, persec, + bytes_processed, FLT_FMT_ARGS(total), FLT_FMT_ARGS(persec), (long unsigned int) total_cycles); #else #warning "HAVE_GET_CYCLES should be defined for WOLFSSL_ESPIDF" @@ -1885,20 +1934,24 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, #else #ifdef HAVE_GET_CYCLES - (void)XSNPRINTF(msg, sizeof(msg), "sym,%s,%s,%lu,%f,%f,%lu,", desc, + (void)XSNPRINTF(msg, sizeof(msg), + "sym,%s,%s,%lu," FLT_FMT "," FLT_FMT ",%lu,", desc, BENCH_DEVID_GET_NAME(useDeviceID), - bytes_processed, total, persec, total_cycles); + bytes_processed, FLT_FMT_ARGS(total), + FLT_FMT_ARGS(persec), total_cycles); #else - (void)XSNPRINTF(msg, sizeof(msg), "sym,%s,%s,%lu,%f,%f,", desc, + (void)XSNPRINTF(msg, sizeof(msg), + "sym,%s,%s,%lu," FLT_FMT "," FLT_FMT ",", desc, BENCH_DEVID_GET_NAME(useDeviceID), - bytes_processed, total, persec); + bytes_processed, FLT_FMT_ARGS(total), + FLT_FMT_ARGS(persec)); #endif #endif #elif defined(BENCH_DEVID) - (void)XSNPRINTF(msg, sizeof(msg), "%s,%s,%f,", desc, - BENCH_DEVID_GET_NAME(useDeviceID), persec); + (void)XSNPRINTF(msg, sizeof(msg), "%s,%s," FLT_FMT ",", desc, + BENCH_DEVID_GET_NAME(useDeviceID), FLT_FMT_ARGS(persec)); #else - (void)XSNPRINTF(msg, sizeof(msg), "%s,%f,", desc, persec); + (void)XSNPRINTF(msg, sizeof(msg), "%s," FLT_FMT ",", desc, FLT_FMT_ARGS(persec)); #endif #ifdef WOLFSSL_ESPIDF @@ -1914,23 +1967,30 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, #ifdef GENERATE_MACHINE_PARSEABLE_REPORT #ifdef HAVE_GET_CYCLES (void)XSNPRINTF(msg, sizeof(msg), - "%-24s%s %5.0f %s %s %5.3f %s, %8.3f %s/s" - ", %lu cycles,", - desc, BENCH_DEVID_GET_NAME(useDeviceID), blocks, blockType, - word[0], total, word[1], persec, blockType, + "%-24s%s " FLT_FMT_PREC2 " %s %s " FLT_FMT_PREC2 " %s, " + FLT_FMT_PREC2 " %s/s, %lu cycles,", + desc, BENCH_DEVID_GET_NAME(useDeviceID), + FLT_FMT_PREC2_ARGS(5, 0, blocks), blockType, + word[0], FLT_FMT_PREC2_ARGS(5, 3, total), word[1], + FLT_FMT_PREC2_ARGS(8, 3, persec), blockType, (unsigned long) total_cycles); #else (void)XSNPRINTF(msg, sizeof(msg), - "%-24s%s %5.0f %s %s %5.3f %s, %8.3f %s/s" - ",", - desc, BENCH_DEVID_GET_NAME(useDeviceID), blocks, blockType, - word[0], total, word[1], persec, blockType); + "%-24s%s " FLT_FMT_PREC2 " %s %s " FLT_FMT_PREC2 " %s, " + FLT_FMT_PREC2 " %s/s,", + desc, BENCH_DEVID_GET_NAME(useDeviceID), + FLT_FMT_PREC2_ARGS(5, 0, blocks), blockType, + word[0], FLT_FMT_PREC2_ARGS(5, 3, total), word[1], + FLT_FMT_PREC2_ARGS(8, 3, persec), blockType); #endif /* HAVE_GET_CYCLES */ #else (void)XSNPRINTF(msg, sizeof(msg), - "%-24s%s %5.0f %s %s %5.3f %s, %8.3f %s/s", - desc, BENCH_DEVID_GET_NAME(useDeviceID), blocks, blockType, - word[0], total, word[1], persec, blockType); + "%-24s%s " FLT_FMT_PREC2 " %s %s " FLT_FMT_PREC2 " %s, " + FLT_FMT_PREC2 " %s/s", + desc, BENCH_DEVID_GET_NAME(useDeviceID), + FLT_FMT_PREC2_ARGS(5, 0, blocks), blockType, + word[0], FLT_FMT_PREC2_ARGS(5, 3, total), word[1], + FLT_FMT_PREC2_ARGS(8, 3, persec), blockType); #endif #ifdef WOLFSSL_ESPIDF @@ -1961,6 +2021,8 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, (void)useDeviceID; (void)ret; + RESTORE_VECTOR_REGISTERS(); + TEST_SLEEP(); } /* bench_stats_sym_finish */ @@ -1976,9 +2038,11 @@ static void bench_stats_asym_finish_ex(const char* algo, int strength, double total, each = 0, opsSec, milliEach; const char **word = bench_result_words2[lng_index]; const char* kOpsSec = "Ops/Sec"; - char msg[256] = {0}; + char msg[256]; static int asym_header_printed = 0; + XMEMSET(msg, 0, sizeof(msg)); + total = current_time(0) - start; #ifdef LINUX_RUSAGE_UTIME @@ -2034,19 +2098,23 @@ static void bench_stats_asym_finish_ex(const char* algo, int strength, #ifdef GENERATE_MACHINE_PARSEABLE_REPORT #ifdef HAVE_GET_CYCLES (void)XSNPRINTF(msg, sizeof(msg), - "asym,%s,%d,%s%s,%.3f,%.3f,%d,%f,%lu,%.6f\n", - algo, strength, desc, desc_extra, milliEach, opsSec, - count, total, (unsigned long) total_cycles, - (double)total_cycles / (double)count); + "asym,%s,%d,%s%s," FLT_FMT_PREC "," FLT_FMT_PREC ",%d," FLT_FMT + ",%lu," FLT_FMT_PREC "\n", + algo, strength, desc, desc_extra, FLT_FMT_PREC_ARGS(3, milliEach), + FLT_FMT_PREC_ARGS(3, opsSec), + count, FLT_FMT_ARGS(total), (unsigned long) total_cycles, + FLT_FMT_PREC_ARGS(6, (double)total_cycles / (double)count)); #else (void)XSNPRINTF(msg, sizeof(msg), - "asym,%s,%d,%s%s,%.3f,%.3f,%d,%f\n", - algo, strength, desc, desc_extra, milliEach, opsSec, - count, total); + "asym,%s,%d,%s%s," FLT_FMT_PREC "," FLT_FMT_PREC ",%d," FLT_FMT "\n", + algo, strength, desc, desc_extra, FLT_FMT_PREC_ARGS(3, milliEach), + FLT_FMT_PREC_ARGS(3, opsSec), + count, FLT_FMT_ARGS(total)); #endif #else - (void)XSNPRINTF(msg, sizeof(msg), "%s,%d,%s%s,%.3f,%.3f,\n", algo, - strength, desc, desc_extra, milliEach, opsSec); + (void)XSNPRINTF(msg, sizeof(msg), "%s,%d,%s%s," FLT_FMT_PREC "," FLT_FMT_PREC ",\n", algo, + strength, desc, desc_extra, FLT_FMT_PREC_ARGS(3, milliEach), + FLT_FMT_PREC_ARGS(3, opsSec)); #endif } /* if (csv_format == 1) */ @@ -2054,25 +2122,28 @@ static void bench_stats_asym_finish_ex(const char* algo, int strength, #ifdef GENERATE_MACHINE_PARSEABLE_REPORT #ifdef HAVE_GET_CYCLES (void)XSNPRINTF(msg, sizeof(msg), - "%-6s %5d %8s%-2s %s %6d %s %5.3f %s, %s %5.3f ms," - " %.3f %s, %lu cycles\n", algo, strength, desc, + "%-6s %5d %8s%-2s %s %6d %s " FLT_FMT_PREC2 " %s, %s " FLT_FMT_PREC2 " ms," + " " FLT_FMT_PREC " %s, %lu cycles\n", algo, strength, desc, desc_extra, BENCH_DEVID_GET_NAME(useDeviceID), - count, word[0], total, word[1], word[2], milliEach, - opsSec, word[3], (unsigned long) total_cycles); + count, word[0], FLT_FMT_PREC2_ARGS(5, 3, total), word[1], word[2], + FLT_FMT_PREC2_ARGS(5, 3, milliEach), + FLT_FMT_PREC_ARGS(3, opsSec), word[3], (unsigned long) total_cycles); #else (void)XSNPRINTF(msg, sizeof(msg), - "%-6s %5d %8s%-2s %s %6d %s %5.3f %s, %s %5.3f ms," - " %.3f %s\n", algo, strength, desc, + "%-6s %5d %8s%-2s %s %6d %s " FLT_FMT_PREC2 " %s, %s " FLT_FMT_PREC2 " ms," + " " FLT_FMT_PREC " %s\n", algo, strength, desc, desc_extra, BENCH_DEVID_GET_NAME(useDeviceID), - count, word[0], total, word[1], word[2], milliEach, - opsSec, word[3]); + count, word[0], FLT_FMT_PREC2_ARGS(5, 3, total), word[1], word[2], + FLT_FMT_PREC2_ARGS(5, 3, milliEach), + FLT_FMT_PREC_ARGS(3, opsSec), word[3]); #endif /* HAVE_GET_CYCLES */ #else (void)XSNPRINTF(msg, sizeof(msg), - "%-6s %5d %8s%-2s %s %6d %s %5.3f %s, %s %5.3f ms," - " %.3f %s\n", algo, strength, desc, desc_extra, + "%-6s %5d %8s%-2s %s %6d %s " FLT_FMT_PREC2 " %s, %s " FLT_FMT_PREC2 " ms," + " " FLT_FMT_PREC " %s\n", algo, strength, desc, desc_extra, BENCH_DEVID_GET_NAME(useDeviceID), count, word[0], - total, word[1], word[2], milliEach, opsSec, word[3]); + FLT_FMT_PREC2_ARGS(5, 3, total), word[1], word[2], + FLT_FMT_PREC2_ARGS(5, 3, milliEach), FLT_FMT_PREC_ARGS(3, opsSec), word[3]); #endif } printf("%s", msg); @@ -2094,6 +2165,8 @@ static void bench_stats_asym_finish_ex(const char* algo, int strength, (void)useDeviceID; (void)ret; + RESTORE_VECTOR_REGISTERS(); + TEST_SLEEP(); } /* bench_stats_asym_finish_ex */ @@ -2906,8 +2979,8 @@ int benchmark_init(void) wolfSSL_Debugging_ON(); #endif - printf("%swolfCrypt Benchmark (block bytes %d, min %.1f sec each)\n", - info_prefix, (int)bench_size, BENCH_MIN_RUNTIME_SEC); + printf("%swolfCrypt Benchmark (block bytes %d, min " FLT_FMT_PREC " sec each)\n", + info_prefix, (int)bench_size, FLT_FMT_PREC_ARGS(1, BENCH_MIN_RUNTIME_SEC)); #ifndef GENERATE_MACHINE_PARSEABLE_REPORT if (csv_format == 1) { @@ -3280,7 +3353,7 @@ static void bench_aesgcm_internal(int useDeviceID, int ret = 0, i, count = 0, times, pending = 0; Aes enc[BENCH_MAX_PENDING]; #ifdef HAVE_AES_DECRYPT - Aes dec[BENCH_MAX_PENDING]; + Aes dec[BENCH_MAX_PENDING+1]; #endif double start; @@ -3295,9 +3368,6 @@ static void bench_aesgcm_internal(int useDeviceID, /* clear for done cleanup */ XMEMSET(enc, 0, sizeof(enc)); -#ifdef HAVE_AES_DECRYPT - XMEMSET(dec, 0, sizeof(dec)); -#endif #ifdef WOLFSSL_ASYNC_CRYPT if (bench_additional) #endif @@ -3350,6 +3420,8 @@ exit_aes_gcm: start, ret); #ifdef HAVE_AES_DECRYPT + XMEMSET(dec, 0, sizeof(dec)); + /* init keys */ for (i = 0; i < BENCH_MAX_PENDING; i++) { if ((ret = wc_AesInit(&dec[i], HEAP_HINT, @@ -3955,7 +4027,10 @@ static void bench_aesctr_internal(const byte* key, word32 keySz, printf("wc_AesInit failed, ret = %d\n", ret); } - wc_AesSetKeyDirect(&enc, key, keySz, iv, AES_ENCRYPTION); + if (wc_AesSetKeyDirect(&enc, key, keySz, iv, AES_ENCRYPTION) < 0) { + printf("wc_AesSetKeyDirect failed, ret = %d\n", ret); + return; + } bench_stats_start(&count, &start); do { @@ -5683,7 +5758,7 @@ exit: #ifdef WOLFSSL_RIPEMD -int bench_ripemd(void) +void bench_ripemd(void) { RipeMd hash; byte digest[RIPEMD_DIGEST_SIZE]; @@ -5693,7 +5768,8 @@ int bench_ripemd(void) if (digest_stream) { ret = wc_InitRipeMd(&hash); if (ret != 0) { - return ret; + printf("wc_InitRipeMd failed, retval %d\n", ret); + return; } bench_stats_start(&count, &start); @@ -5701,12 +5777,14 @@ int bench_ripemd(void) for (i = 0; i < numBlocks; i++) { ret = wc_RipeMdUpdate(&hash, bench_plain, bench_size); if (ret != 0) { - return ret; + printf("wc_RipeMdUpdate failed, retval %d\n", ret); + return; } } ret = wc_RipeMdFinal(&hash, digest); if (ret != 0) { - return ret; + printf("wc_RipeMdFinal failed, retval %d\n", ret); + return; } count += i; @@ -5718,15 +5796,18 @@ int bench_ripemd(void) for (i = 0; i < numBlocks; i++) { ret = wc_InitRipeMd(&hash); if (ret != 0) { - return ret; + printf("wc_InitRipeMd failed, retval %d\n", ret); + return; } ret = wc_RipeMdUpdate(&hash, bench_plain, bench_size); if (ret != 0) { - return ret; + printf("wc_RipeMdUpdate failed, retval %d\n", ret); + return; } ret = wc_RipeMdFinal(&hash, digest); if (ret != 0) { - return ret; + printf("wc_RipeMdFinal failed, retval %d\n", ret); + return; } } count += i; @@ -5734,7 +5815,7 @@ int bench_ripemd(void) } bench_stats_sym_finish("RIPEMD", 0, count, bench_size, start, ret); - return 0; + return; } #endif @@ -5965,7 +6046,7 @@ exit: #ifndef NO_HMAC static void bench_hmac(int useDeviceID, int type, int digestSz, - byte* key, word32 keySz, const char* label) + const byte* key, word32 keySz, const char* label) { Hmac hmac[BENCH_MAX_PENDING]; double start; @@ -6057,7 +6138,8 @@ exit: void bench_hmac_md5(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b }; bench_hmac(useDeviceID, WC_MD5, WC_MD5_DIGEST_SIZE, key, sizeof(key), @@ -6070,7 +6152,8 @@ void bench_hmac_md5(int useDeviceID) void bench_hmac_sha(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b }; @@ -6084,7 +6167,8 @@ void bench_hmac_sha(int useDeviceID) void bench_hmac_sha224(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b }; @@ -6100,7 +6184,8 @@ void bench_hmac_sha224(int useDeviceID) void bench_hmac_sha256(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b }; @@ -6115,7 +6200,8 @@ void bench_hmac_sha256(int useDeviceID) void bench_hmac_sha384(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, @@ -6132,7 +6218,8 @@ void bench_hmac_sha384(int useDeviceID) void bench_hmac_sha512(int useDeviceID) { - byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + WOLFSSL_SMALL_STACK_STATIC const byte key[] = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, @@ -6153,7 +6240,8 @@ void bench_pbkdf2(void) double start; int ret = 0, count = 0; const char* passwd32 = "passwordpasswordpasswordpassword"; - const byte salt32[] = { 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06, + WOLFSSL_SMALL_STACK_STATIC const byte salt32[] = { + 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06, 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06, 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06, 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06 }; @@ -6207,14 +6295,27 @@ void bench_siphash(void) #if defined(WOLFSSL_KEY_GEN) static void bench_rsaKeyGen_helper(int useDeviceID, word32 keySz) { +#ifdef WOLFSSL_SMALL_STACK + RsaKey *genKey; +#else RsaKey genKey[BENCH_MAX_PENDING]; +#endif double start; int ret = 0, i, count = 0, times, pending = 0; const long rsa_e_val = WC_RSA_EXPONENT; const char**desc = bench_desc_words[lng_index]; +#ifdef WOLFSSL_SMALL_STACK + genKey = (RsaKey *)XMALLOC(sizeof(*genKey) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (genKey == NULL) { + printf("bench_rsaKeyGen_helper malloc failed\n"); + return; + } +#endif + /* clear for done cleanup */ - XMEMSET(genKey, 0, sizeof(genKey)); + XMEMSET(genKey, 0, sizeof(*genKey) * BENCH_MAX_PENDING); bench_stats_start(&count, &start); do { @@ -6252,6 +6353,10 @@ exit: for (i = 0; i < BENCH_MAX_PENDING; i++) { wc_FreeRsaKey(&genKey[i]); } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(genKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif } void bench_rsaKeyGen(int useDeviceID) @@ -6292,7 +6397,7 @@ void bench_rsaKeyGen_size(int useDeviceID, word32 keySz) #if defined(WOLFSSL_RSA_VERIFY_INLINE) || defined(WOLFSSL_RSA_PUBLIC_ONLY) #if defined(USE_CERT_BUFFERS_2048) -static unsigned char rsa_2048_sig[] = { +static const unsigned char rsa_2048_sig[] = { 0x8c, 0x9e, 0x37, 0xbf, 0xc3, 0xa6, 0xba, 0x1c, 0x53, 0x22, 0x40, 0x4b, 0x8b, 0x0d, 0x3c, 0x0e, 0x2e, 0x8c, 0x31, 0x2c, 0x47, 0xbf, 0x03, 0x48, @@ -6327,7 +6432,7 @@ static unsigned char rsa_2048_sig[] = { 0x9e, 0xd2, 0x51, 0xe6, 0x41, 0xbf, 0x4f, 0xa2 }; #elif defined(USE_CERT_BUFFERS_3072) -static unsigned char rsa_3072_sig[] = { +static const unsigned char rsa_3072_sig[] = { 0x1a, 0xd6, 0x0d, 0xfd, 0xe3, 0x41, 0x95, 0x76, 0x27, 0x16, 0x7d, 0xc7, 0x94, 0x16, 0xca, 0xa8, 0x26, 0x08, 0xbe, 0x78, 0x87, 0x72, 0x4c, 0xd9, @@ -6603,7 +6708,11 @@ exit: void bench_rsa(int useDeviceID) { int i; +#ifdef WOLFSSL_SMALL_STACK + RsaKey *rsaKey; +#else RsaKey rsaKey[BENCH_MAX_PENDING]; +#endif int ret = 0; word32 rsaKeySz = 0; const byte* tmp; @@ -6612,6 +6721,15 @@ void bench_rsa(int useDeviceID) word32 idx; #endif +#ifdef WOLFSSL_SMALL_STACK + rsaKey = (RsaKey *)XMALLOC(sizeof(*rsaKey) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (rsaKey == NULL) { + printf("bench_rsa malloc failed\n"); + return; + } +#endif + #ifdef USE_CERT_BUFFERS_1024 tmp = rsa_key_der_1024; bytes = (size_t)sizeof_rsa_key_der_1024; @@ -6633,7 +6751,7 @@ void bench_rsa(int useDeviceID) #endif /* USE_CERT_BUFFERS */ /* clear for done cleanup */ - XMEMSET(rsaKey, 0, sizeof(rsaKey)); + XMEMSET(rsaKey, 0, sizeof(*rsaKey) * BENCH_MAX_PENDING); /* init keys */ for (i = 0; i < BENCH_MAX_PENDING; i++) { @@ -6695,6 +6813,10 @@ exit_bench_rsa: for (i = 0; i < BENCH_MAX_PENDING; i++) { wc_FreeRsaKey(&rsaKey[i]); } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(rsaKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif } @@ -6703,12 +6825,25 @@ exit_bench_rsa: void bench_rsa_key(int useDeviceID, word32 rsaKeySz) { int ret = 0, i, pending = 0; +#ifdef WOLFSSL_SMALL_STACK + RsaKey *rsaKey; +#else RsaKey rsaKey[BENCH_MAX_PENDING]; +#endif int isPending[BENCH_MAX_PENDING]; long exp = 65537L; +#ifdef WOLFSSL_SMALL_STACK + rsaKey = (RsaKey *)XMALLOC(sizeof(*rsaKey) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (rsaKey == NULL) { + printf("bench_rsa_key malloc failed\n"); + return; + } +#endif + /* clear for done cleanup */ - XMEMSET(rsaKey, 0, sizeof(rsaKey)); + XMEMSET(rsaKey, 0, sizeof(*rsaKey) * BENCH_MAX_PENDING); XMEMSET(isPending, 0, sizeof(isPending)); /* init keys */ @@ -6750,6 +6885,10 @@ exit_bench_rsa_key: for (i = 0; i < BENCH_MAX_PENDING; i++) { wc_FreeRsaKey(&rsaKey[i]); } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(rsaKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif } #endif /* WOLFSSL_KEY_GEN */ #endif /* !NO_RSA */ @@ -6785,7 +6924,11 @@ void bench_dh(int useDeviceID) int count = 0, times, pending = 0; const byte* tmp = NULL; double start = 0.0F; +#ifdef WOLFSSL_SMALL_STACK + DhKey *dhKey = NULL; +#else DhKey dhKey[BENCH_MAX_PENDING]; +#endif int dhKeySz = BENCH_DH_KEY_SIZE * 8; /* used in printf */ const char**desc = bench_desc_words[lng_index]; #ifndef NO_ASN @@ -6823,6 +6966,15 @@ void bench_dh(int useDeviceID) WC_INIT_ARRAY(priv, byte, BENCH_MAX_PENDING, BENCH_DH_PRIV_SIZE, HEAP_HINT); +#ifdef WOLFSSL_SMALL_STACK + dhKey = (DhKey *)XMALLOC(sizeof(DhKey) * BENCH_MAX_PENDING, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if (! dhKey) { + ret = MEMORY_E; + goto exit; + } +#endif + #ifdef WC_DECLARE_VAR_IS_HEAP_ALLOC if (pub[0] == NULL || pub2 == NULL || agree[0] == NULL || priv[0] == NULL || priv2 == NULL) { ret = MEMORY_E; @@ -6888,7 +7040,12 @@ void bench_dh(int useDeviceID) #endif /* clear for done cleanup */ - XMEMSET(dhKey, 0, sizeof(dhKey)); + XMEMSET(dhKey, 0, sizeof(DhKey) * BENCH_MAX_PENDING); +#if 0 + for (i = 0; i < BENCH_MAX_PENDING; i++) { + XMEMSET(dhKey[i], 0, sizeof(DhKey)); + } +#endif /* init keys */ for (i = 0; i < BENCH_MAX_PENDING; i++) { @@ -6999,9 +7156,18 @@ exit: useDeviceID, count, start, ret); /* cleanup */ +#ifdef WOLFSSL_SMALL_STACK + if (dhKey) { + for (i = 0; i < BENCH_MAX_PENDING; i++) { + wc_FreeDhKey(&dhKey[i]); + } + XFREE(dhKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + } +#else for (i = 0; i < BENCH_MAX_PENDING; i++) { wc_FreeDhKey(&dhKey[i]); } +#endif WC_FREE_ARRAY(pub, BENCH_MAX_PENDING, HEAP_HINT); WC_FREE_VAR(pub2, HEAP_HINT); @@ -7169,16 +7335,29 @@ void bench_eccMakeKey(int useDeviceID, int curveId) int ret = 0, i, times, count, pending = 0; int deviceID; int keySize; +#ifdef WOLFSSL_SMALL_STACK + ecc_key *genKey; +#else ecc_key genKey[BENCH_MAX_PENDING]; +#endif char name[BENCH_ECC_NAME_SZ]; double start; const char**desc = bench_desc_words[lng_index]; +#ifdef WOLFSSL_SMALL_STACK + genKey = (ecc_key *)XMALLOC(sizeof(*genKey) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (genKey == NULL) { + printf("bench_eccMakeKey malloc failed\n"); + return; + } +#endif + deviceID = useDeviceID ? devId : INVALID_DEVID; keySize = wc_ecc_get_curve_size_from_id(curveId); /* clear for done cleanup */ - XMEMSET(&genKey, 0, sizeof(genKey)); + XMEMSET(genKey, 0, sizeof(*genKey) * BENCH_MAX_PENDING); /* ECC Make Key */ bench_stats_start(&count, &start); @@ -7221,6 +7400,10 @@ exit: for (i = 0; i < BENCH_MAX_PENDING; i++) { wc_ecc_free(&genKey[i]); } + +#ifdef WOLFSSL_SMALL_STACK + XFREE(genKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif } @@ -7230,10 +7413,18 @@ void bench_ecc(int useDeviceID, int curveId) int deviceID; int keySize; char name[BENCH_ECC_NAME_SZ]; +#ifdef WOLFSSL_SMALL_STACK + ecc_key *genKey; +#else ecc_key genKey[BENCH_MAX_PENDING]; +#endif #ifdef HAVE_ECC_DHE +#ifdef WOLFSSL_SMALL_STACK + ecc_key *genKey2; +#else ecc_key genKey2[BENCH_MAX_PENDING]; #endif +#endif #if !defined(NO_ASN) && defined(HAVE_ECC_SIGN) #ifdef HAVE_ECC_VERIFY @@ -7257,6 +7448,24 @@ void bench_ecc(int useDeviceID, int curveId) BENCH_MAX_PENDING, MAX_ECC_BYTES, HEAP_HINT); #endif +#ifdef WOLFSSL_SMALL_STACK + genKey = (ecc_key *)XMALLOC(sizeof(*genKey) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (genKey == NULL) { + printf("bench_eccMakeKey malloc failed\n"); + return; + } +#ifdef HAVE_ECC_DHE + genKey2 = (ecc_key *)XMALLOC(sizeof(*genKey2) * BENCH_MAX_PENDING, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (genKey2 == NULL) { + XFREE(genKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + printf("bench_eccMakeKey malloc failed\n"); + return; + } +#endif +#endif + #ifdef HAVE_ECC_DHE WC_INIT_ARRAY(shared, byte, BENCH_MAX_PENDING, MAX_ECC_BYTES, HEAP_HINT); @@ -7269,9 +7478,9 @@ void bench_ecc(int useDeviceID, int curveId) deviceID = useDeviceID ? devId : INVALID_DEVID; /* clear for done cleanup */ - XMEMSET(&genKey, 0, sizeof(genKey)); + XMEMSET(genKey, 0, sizeof(*genKey) * BENCH_MAX_PENDING); #ifdef HAVE_ECC_DHE - XMEMSET(&genKey2, 0, sizeof(genKey2)); + XMEMSET(genKey2, 0, sizeof(*genKey2) * BENCH_MAX_PENDING); #endif keySize = wc_ecc_get_curve_size_from_id(curveId); @@ -7446,6 +7655,13 @@ exit: #endif } +#ifdef WOLFSSL_SMALL_STACK + XFREE(genKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #ifdef HAVE_ECC_DHE + XFREE(genKey2, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif +#endif + #ifdef HAVE_ECC_DHE WC_FREE_ARRAY(shared, BENCH_MAX_PENDING, HEAP_HINT); #endif @@ -7468,58 +7684,85 @@ exit: #ifdef HAVE_ECC_ENCRYPT void bench_eccEncrypt(int curveId) { - ecc_key userA, userB; +#define BENCH_ECCENCRYPT_MSG_SIZE 48 +#define BENCH_ECCENCRYPT_OUT_SIZE (BENCH_ECCENCRYPT_MSG_SIZE + \ + WC_SHA256_DIGEST_SIZE + \ + (MAX_ECC_BITS+3)/4 + 2) + word32 outSz = BENCH_ECCENCRYPT_OUT_SIZE; +#ifdef WOLFSSL_SMALL_STACK + ecc_key *userA = NULL, *userB = NULL; + byte *msg = NULL; + byte *out = NULL; + char *name = NULL; +#else + ecc_key userA[1], userB[1]; + byte msg[BENCH_ECCENCRYPT_MSG_SIZE]; + byte out[BENCH_ECCENCRYPT_OUT_SIZE]; + char name[BENCH_ECC_NAME_SZ]; +#endif int keySize; - byte msg[48]; - byte out[sizeof(msg) + WC_SHA256_DIGEST_SIZE + (MAX_ECC_BITS+3)/4 + 2]; - word32 outSz = sizeof(out); word32 bench_plainSz = bench_size; int ret, i, count; double start; const char**desc = bench_desc_words[lng_index]; - char name[BENCH_ECC_NAME_SZ]; + +#ifdef WOLFSSL_SMALL_STACK + userA = (ecc_key *)XMALLOC(sizeof(*userA), + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + userB = (ecc_key *)XMALLOC(sizeof(*userB), + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + msg = (byte *)XMALLOC(BENCH_ECCENCRYPT_MSG_SIZE, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + out = (byte *)XMALLOC(outSz, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + name = (char *)XMALLOC(BENCH_ECC_NAME_SZ, + HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if ((! userA) || (! userB) || (! msg) || (! out) || (! name)) { + printf("bench_eccEncrypt malloc failed\n"); + goto exit; + } +#endif keySize = wc_ecc_get_curve_size_from_id(curveId); - ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId); + ret = wc_ecc_init_ex(userA, HEAP_HINT, devId); if (ret != 0) { printf("wc_ecc_encrypt make key A failed: %d\n", ret); - return; + goto exit; } - ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId); + ret = wc_ecc_init_ex(userB, HEAP_HINT, devId); if (ret != 0) { printf("wc_ecc_encrypt make key B failed: %d\n", ret); - wc_ecc_free(&userA); - return; + goto exit; } #if defined(ECC_TIMING_RESISTANT) && (!defined(HAVE_FIPS) || \ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION != 2))) && \ !defined(HAVE_SELFTEST) - ret = wc_ecc_set_rng(&userA, &gRng); + ret = wc_ecc_set_rng(userA, &gRng); if (ret != 0) { goto exit; } - ret = wc_ecc_set_rng(&userB, &gRng); + ret = wc_ecc_set_rng(userB, &gRng); if (ret != 0) { goto exit; } #endif - ret = wc_ecc_make_key_ex(&gRng, keySize, &userA, curveId); + ret = wc_ecc_make_key_ex(&gRng, keySize, userA, curveId); #ifdef WOLFSSL_ASYNC_CRYPT - ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE); + ret = wc_AsyncWait(ret, &userA->asyncDev, WC_ASYNC_FLAG_NONE); #endif if (ret != 0) goto exit; - ret = wc_ecc_make_key_ex(&gRng, keySize, &userB, curveId); + ret = wc_ecc_make_key_ex(&gRng, keySize, userB, curveId); #ifdef WOLFSSL_ASYNC_CRYPT - ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE); + ret = wc_AsyncWait(ret, &userB->asyncDev, WC_ASYNC_FLAG_NONE); #endif if (ret != 0) goto exit; - for (i = 0; i < (int)sizeof(msg); i++) { + for (i = 0; i < BENCH_ECCENCRYPT_MSG_SIZE; i++) { msg[i] = (byte)i; } @@ -7527,7 +7770,7 @@ void bench_eccEncrypt(int curveId) do { for (i = 0; i < ntimes; i++) { /* encrypt msg to B */ - ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), + ret = wc_ecc_encrypt(userA, userB, msg, BENCH_ECCENCRYPT_MSG_SIZE, out, &outSz, NULL); if (ret != 0) { printf("wc_ecc_encrypt failed! %d\n", ret); @@ -7546,7 +7789,7 @@ exit_enc: do { for (i = 0; i < ntimes; i++) { /* decrypt msg from A */ - ret = wc_ecc_decrypt(&userB, &userA, out, outSz, bench_plain, + ret = wc_ecc_decrypt(userB, userA, out, outSz, bench_plain, &bench_plainSz, NULL); if (ret != 0) { printf("wc_ecc_decrypt failed! %d\n", ret); @@ -7561,8 +7804,25 @@ exit_dec: exit: /* cleanup */ - wc_ecc_free(&userB); - wc_ecc_free(&userA); +#ifdef WOLFSSL_SMALL_STACK + if (userA) { + wc_ecc_free(userA); + XFREE(userA, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + } + if (userB) { + wc_ecc_free(userB); + XFREE(userB, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + } + if (msg) + XFREE(msg, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (out) + XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (name) + XFREE(name, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#else + wc_ecc_free(userB); + wc_ecc_free(userA); +#endif } #endif #endif /* HAVE_ECC */ @@ -7928,7 +8188,7 @@ void bench_eccsiPairGen(void) const char**desc = bench_desc_words[lng_index]; mp_int ssk; ecc_point* pvt; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; int ret; (void)mp_init(&ssk); @@ -7966,7 +8226,7 @@ void bench_eccsiValidate(void) const char**desc = bench_desc_words[lng_index]; mp_int ssk; ecc_point* pvt; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; int valid; int ret; @@ -8006,8 +8266,8 @@ void bench_eccsi(void) const char**desc = bench_desc_words[lng_index]; mp_int ssk; ecc_point* pvt; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; - byte msg[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte msg[] = { 0x01, 0x23, 0x34, 0x45 }; byte hash[WC_SHA256_DIGEST_SIZE]; byte hashSz = (byte)sizeof(hash); byte sig[257]; @@ -8098,7 +8358,7 @@ void bench_sakkeRskGen(void) int i, count; const char**desc = bench_desc_words[lng_index]; ecc_point* rsk; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; int ret; rsk = wc_ecc_new_point(); @@ -8132,7 +8392,7 @@ void bench_sakkeValidate(void) int i, count; const char**desc = bench_desc_words[lng_index]; ecc_point* rsk; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; int valid; int ret; @@ -8168,8 +8428,9 @@ void bench_sakke(void) int i, count; const char**desc = bench_desc_words[lng_index]; ecc_point* rsk; - byte id[] = { 0x01, 0x23, 0x34, 0x45 }; - byte ssv[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte id[] = { 0x01, 0x23, 0x34, 0x45 }; + static const byte ssv_init[] = { 0x01, 0x23, 0x34, 0x45 }; + byte ssv[sizeof(ssv_init)]; byte derSSV[sizeof(ssv)]; byte auth[257]; word16 authSz = sizeof(auth); @@ -8179,6 +8440,8 @@ void bench_sakke(void) byte* iTable = NULL; word32 iTableLen = 0; + XMEMCPY(ssv, ssv_init, sizeof ssv); + rsk = wc_ecc_new_point(); (void)wc_InitSakkeKey_ex(&genKey, 128, ECC_SAKKE_1, NULL, INVALID_DEVID); (void)wc_MakeSakkeKey(&genKey, &gRng); @@ -8910,8 +9173,18 @@ void bench_sphincsKeySign(byte level, byte optim) (cur_stime - start_stime) / (cur_utime - start_utime); if (stime_utime_ratio > .1) printf("%swarning, " - "excessive system time ratio for %s%s (%.3f%%).\n", - err_prefix, desc, desc_extra, stime_utime_ratio * 100.0); + "excessive system time ratio for %s%s (" FLT_FMT_PREC "%%).\n", + err_prefix, desc, desc_extra, + FLT_FMT_PREC_ARGS(3, stime_utime_ratio * 100.0)); + } + +#elif defined(WOLFSSL_LINUXKM) + + double current_time(int reset) + { + (void)reset; + u64 ns = ktime_get_ns(); + return (double)ns / 1000000000.0; } #else @@ -8983,7 +9256,7 @@ static void print_alg(const char* str, int* line) { const char* const ident = " "; if (*line == 0) { - fputs(ident, stdout); + printf("%s", ident); *line = (int)XSTRLEN(ident); } printf(" %s", str); diff --git a/wolfcrypt/benchmark/benchmark.h b/wolfcrypt/benchmark/benchmark.h index a42b133de..f119fc969 100644 --- a/wolfcrypt/benchmark/benchmark.h +++ b/wolfcrypt/benchmark/benchmark.h @@ -81,7 +81,7 @@ void bench_sha3_384(int useDeviceID); void bench_sha3_512(int useDeviceID); void bench_shake128(int useDeviceID); void bench_shake256(int useDeviceID); -int bench_ripemd(void); +void bench_ripemd(void); void bench_cmac(int useDeviceID); void bench_scrypt(void); void bench_hmac_md5(int useDeviceID); diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 1ab87695c..51e1c416f 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -3129,6 +3129,12 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( checkAESNI = 1; } if (haveAESNI) { + #ifdef WOLFSSL_LINUXKM + /* runtime alignment check */ + if ((wc_ptr_t)&aes->key & (wc_ptr_t)0xf) { + return BAD_ALIGN_E; + } + #endif aes->use_aesni = 1; if (iv) XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index 4d37a4395..43e09378e 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -1864,7 +1864,7 @@ int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz) #ifdef CUSTOM_RAND_GENERATE_BLOCK XMEMSET(output, 0, sz); - ret = CUSTOM_RAND_GENERATE_BLOCK(output, sz); + ret = (int)CUSTOM_RAND_GENERATE_BLOCK(output, sz); #else #ifdef HAVE_HASHDRBG diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c index b6736e501..ff1a817b0 100644 --- a/wolfcrypt/src/wc_port.c +++ b/wolfcrypt/src/wc_port.c @@ -207,7 +207,7 @@ int wolfCrypt_Init(void) } #endif - #if defined(WOLFSSL_LINUXKM_SIMD_X86) + #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS ret = allocate_wolfcrypt_linuxkm_fpu_states(); if (ret != 0) { WOLFSSL_MSG("allocate_wolfcrypt_linuxkm_fpu_states failed"); @@ -466,7 +466,7 @@ int wolfCrypt_Cleanup(void) rpcmem_deinit(); wolfSSL_CleanupHandle(); #endif - #if defined(WOLFSSL_LINUXKM_SIMD_X86) + #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS free_wolfcrypt_linuxkm_fpu_states(); #endif @@ -3207,56 +3207,6 @@ char* mystrnstr(const char* s1, const char* s2, unsigned int n) #endif /* WOLFSSL_NUCLEUS_1_2 */ -#if defined(WOLFSSL_LINUXKM) && defined(HAVE_KVMALLOC) - /* adapted from kvrealloc() draft by Changli Gao, 2010-05-13 */ - void *lkm_realloc(void *ptr, size_t newsize) { - void *nptr; - size_t oldsize; - - if (unlikely(newsize == 0)) { - kvfree(ptr); - return ZERO_SIZE_PTR; - } - - if (unlikely(ptr == NULL)) - return kvmalloc_node(newsize, GFP_KERNEL, NUMA_NO_NODE); - - if (is_vmalloc_addr(ptr)) { - /* no way to discern the size of the old allocation, - * because the kernel doesn't export find_vm_area(). if - * it did, we could then call get_vm_area_size() on the - * returned struct vm_struct. - */ - return NULL; - } else { -#ifndef __PIE__ - struct page *page; - - page = virt_to_head_page(ptr); - if (PageSlab(page) || PageCompound(page)) { - if (newsize < PAGE_SIZE) -#endif /* ! __PIE__ */ - return krealloc(ptr, newsize, GFP_KERNEL); -#ifndef __PIE__ - oldsize = ksize(ptr); - } else { - oldsize = page->private; - if (newsize <= oldsize) - return ptr; - } -#endif /* ! __PIE__ */ - } - - nptr = kvmalloc_node(newsize, GFP_KERNEL, NUMA_NO_NODE); - if (nptr != NULL) { - memcpy(nptr, ptr, oldsize); - kvfree(ptr); - } - - return nptr; - } -#endif /* WOLFSSL_LINUXKM && HAVE_KVMALLOC */ - #if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH) #include /* initialize and Mutex for TI Crypt Engine */ #include /* md5, sha1, sha224, sha256 */ diff --git a/wolfssl/wolfcrypt/mem_track.h b/wolfssl/wolfcrypt/mem_track.h index 945c10acc..483f14ab7 100644 --- a/wolfssl/wolfcrypt/mem_track.h +++ b/wolfssl/wolfcrypt/mem_track.h @@ -62,15 +62,24 @@ #include "wolfssl/wolfcrypt/settings.h" #include "wolfssl/wolfcrypt/logging.h" +#include "wolfssl/wolfcrypt/memory.h" #if defined(WOLFSSL_TRACK_MEMORY) || defined(HAVE_STACK_SIZE) || \ defined(HAVE_STACK_SIZE_VERBOSE) -#include + #ifdef NO_STDIO_FILESYSTEM + /* if wc_port.h/linuxkm_wc_port.h doesn't define printf, then the user + * needs to define it. + */ + #define wc_mem_printf(...) printf(__VA_ARGS__) + #else + #include + #define wc_mem_printf(...) fprintf(stderr, __VA_ARGS__) + #endif #endif #if defined(WOLFSSL_TRACK_MEMORY) #define DO_MEM_STATS - #if defined(__linux__) || defined(__MACH__) + #if (defined(__linux__) && !defined(WOLFSSL_LINUXKM)) || defined(__MACH__) #define DO_MEM_LIST #endif #endif @@ -160,7 +169,7 @@ static WC_INLINE void* TrackMalloc(size_t sz) #ifdef WOLFSSL_DEBUG_MEMORY #ifdef WOLFSSL_DEBUG_MEMORY_PRINT - fprintf(stderr, "Alloc: %p -> %u at %s:%d\n", header->thisMemory, (word32)sz, func, line); + wc_mem_printf("Alloc: %p -> %u at %s:%d\n", header->thisMemory, (word32)sz, func, line); #else (void)func; (void)line; @@ -268,7 +277,7 @@ static WC_INLINE void TrackFree(void* ptr) #ifdef WOLFSSL_DEBUG_MEMORY #ifdef WOLFSSL_DEBUG_MEMORY_PRINT - fprintf(stderr, "Free: %p -> %u at %s:%d\n", ptr, (word32)sz, func, line); + wc_mem_printf("Free: %p -> %u at %s:%d\n", ptr, (word32)sz, func, line); #else (void)func; (void)line; @@ -334,11 +343,11 @@ static WC_INLINE int InitMemoryTracker(void) ret = wolfSSL_GetAllocators(&mfDefault, &ffDefault, &rfDefault); if (ret < 0) { - fprintf(stderr, "wolfSSL GetAllocators failed to get the defaults\n"); + wc_mem_printf("wolfSSL GetAllocators failed to get the defaults\n"); } ret = wolfSSL_SetAllocators(TrackMalloc, TrackFree, TrackRealloc); if (ret < 0) { - fprintf(stderr, "wolfSSL SetAllocators failed for track memory\n"); + wc_mem_printf("wolfSSL SetAllocators failed for track memory\n"); return ret; } @@ -377,11 +386,11 @@ static WC_INLINE void ShowMemoryTracker(void) #endif #ifdef DO_MEM_STATS - fprintf(stderr, "total Allocs = %9ld\n", ourMemStats.totalAllocs); - fprintf(stderr, "total Deallocs = %9ld\n", ourMemStats.totalDeallocs); - fprintf(stderr, "total Bytes = %9ld\n", ourMemStats.totalBytes); - fprintf(stderr, "peak Bytes = %9ld\n", ourMemStats.peakBytes); - fprintf(stderr, "current Bytes = %9ld\n", ourMemStats.currentBytes); + wc_mem_printf("total Allocs = %9ld\n", ourMemStats.totalAllocs); + wc_mem_printf("total Deallocs = %9ld\n", ourMemStats.totalDeallocs); + wc_mem_printf("total Bytes = %9ld\n", ourMemStats.totalBytes); + wc_mem_printf("peak Bytes = %9ld\n", ourMemStats.peakBytes); + wc_mem_printf("current Bytes = %9ld\n", ourMemStats.currentBytes); #endif #ifdef DO_MEM_LIST @@ -389,16 +398,14 @@ static WC_INLINE void ShowMemoryTracker(void) /* print list of allocations */ memHint* header; for (header = ourMemList.head; header != NULL; header = header->next) { - fprintf(stderr, "Leak: Ptr %p, Size %u" #ifdef WOLFSSL_DEBUG_MEMORY - ", Func %s, Line %d" - #endif - "\n", - (byte*)header + sizeof(memHint), (unsigned int)header->thisSize - #ifdef WOLFSSL_DEBUG_MEMORY - , header->func, header->line - #endif - ); + wc_mem_printf("Leak: Ptr %p, Size %u, Func %s, Line %d\n", + (byte*)header + sizeof(memHint), (unsigned int)header->thisSize, + header->func, header->line); +#else + wc_mem_printf("Leak: Ptr %p, Size %u\n", + (byte*)header + sizeof(memHint), (unsigned int)header->thisSize); +#endif } } @@ -538,7 +545,7 @@ int StackSizeHWMReset(void) #define STACK_SIZE_CHECKPOINT_MSG(msg) ({ \ ssize_t HWM = StackSizeHWM_OffsetCorrected(); \ - fprintf(stderr, "%ld\t%s\n", (long int)HWM, msg); \ + wc_mem_printf("%ld\t%s\n", (long int)HWM, msg); \ StackSizeHWMReset(); \ }) @@ -549,7 +556,7 @@ int StackSizeHWMReset(void) printf(" relative stack peak usage = %ld bytes\n", (long int)HWM); \ _ret = StackSizeHWMReset(); \ if ((max >= 0) && (HWM > (ssize_t)(max))) { \ - fprintf(stderr, \ + wc_mem_printf( \ " relative stack usage at %s L%d exceeds designated max %ld bytes.\n", \ __FILE__, __LINE__, (long int)(max)); \ _ret = -1; \ @@ -585,7 +592,7 @@ static WC_INLINE int StackSizeCheck(struct func_args* args, thread_func tf) ret = posix_memalign((void**)&myStack, sysconf(_SC_PAGESIZE), stackSize); if (ret != 0 || myStack == NULL) { - fprintf(stderr, "posix_memalign failed\n"); + wc_mem_printf("posix_memalign failed\n"); return -1; } @@ -593,13 +600,13 @@ static WC_INLINE int StackSizeCheck(struct func_args* args, thread_func tf) ret = pthread_attr_init(&myAttr); if (ret != 0) { - fprintf(stderr, "attr_init failed\n"); + wc_mem_printf("attr_init failed\n"); return ret; } ret = pthread_attr_setstack(&myAttr, myStack, stackSize); if (ret != 0) { - fprintf(stderr, "attr_setstackaddr failed\n"); + wc_mem_printf("attr_setstackaddr failed\n"); return ret; } @@ -623,7 +630,7 @@ static WC_INLINE int StackSizeCheck(struct func_args* args, thread_func tf) ret = pthread_join(threadId, &status); if (ret != 0) { - fprintf(stderr, "pthread_join failed\n"); + wc_mem_printf("pthread_join failed\n"); return ret; } @@ -672,7 +679,7 @@ static WC_INLINE int StackSizeCheck_launch(struct func_args* args, ret = posix_memalign((void**)&myStack, sysconf(_SC_PAGESIZE), stackSize); if (ret != 0 || myStack == NULL) { - fprintf(stderr, "posix_memalign failed\n"); + wc_mem_printf("posix_memalign failed\n"); free(shim_args); return -1; } @@ -681,7 +688,7 @@ static WC_INLINE int StackSizeCheck_launch(struct func_args* args, ret = pthread_attr_init(&myAttr); if (ret != 0) { - fprintf(stderr, "attr_init failed\n"); + wc_mem_printf("attr_init failed\n"); free(shim_args); free(myStack); return ret; @@ -689,7 +696,7 @@ static WC_INLINE int StackSizeCheck_launch(struct func_args* args, ret = pthread_attr_setstack(&myAttr, myStack, stackSize); if (ret != 0) { - fprintf(stderr, "attr_setstackaddr failed\n"); + wc_mem_printf("attr_setstackaddr failed\n"); } shim_args->myStack = myStack; @@ -721,7 +728,7 @@ static WC_INLINE int StackSizeCheck_reap(pthread_t threadId, void *stack_context void *status; int ret = pthread_join(threadId, &status); if (ret != 0) { - fprintf(stderr, "pthread_join failed\n"); + wc_mem_printf("pthread_join failed\n"); return ret; } @@ -770,12 +777,12 @@ static WC_INLINE void StackTrap(void) { struct rlimit rl; if (getrlimit(RLIMIT_STACK, &rl) != 0) { - fprintf(stderr, "getrlimit failed\n"); + wc_mem_printf("getrlimit failed\n"); } printf("rlim_cur = %llu\n", rl.rlim_cur); rl.rlim_cur = 1024*21; /* adjust trap size here */ if (setrlimit(RLIMIT_STACK, &rl) != 0) { - fprintf(stderr, "setrlimit failed\n"); + wc_mem_printf("setrlimit failed\n"); } } diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index f274baf83..b105028e3 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -598,8 +598,8 @@ typedef struct w64wrapper { #define WC_DECLARE_ARRAY(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \ VAR_TYPE VAR_NAME[VAR_ITEMS][VAR_SIZE] #define WC_INIT_ARRAY(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) do {} while(0) - #define WC_FREE_VAR(VAR_NAME, HEAP) /* nothing to free, its stack */ - #define WC_FREE_ARRAY(VAR_NAME, VAR_ITEMS, HEAP) /* nothing to free, its stack */ + #define WC_FREE_VAR(VAR_NAME, HEAP) do {} while(0) /* nothing to free, its stack */ + #define WC_FREE_ARRAY(VAR_NAME, VAR_ITEMS, HEAP) do {} while(0) /* nothing to free, its stack */ #define WC_DECLARE_ARRAY_DYNAMIC_DEC(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \ VAR_TYPE* VAR_NAME[VAR_ITEMS]; \