diff --git a/configure.ac b/configure.ac index e929707e0..6a7985137 100644 --- a/configure.ac +++ b/configure.ac @@ -2993,7 +2993,7 @@ then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESNI" if test "$ENABLED_LINUXKM_DEFAULTS" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DWC_AES_C_DYNAMIC_FALLBACK" + AM_CFLAGS="$AM_CFLAGS -DWC_C_DYNAMIC_FALLBACK" fi if test "$CC" != "icc" then @@ -3852,6 +3852,17 @@ then ENABLED_CURVE25519=yes fi + if test "$ENABLED_CURVE25519" = "noasm" + then + AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_X64" + fi + + if test "$ENABLED_CURVE25519" = "yes" && test "$ENABLED_LINUXKM_DEFAULTS" = "yes" + then + ENABLED_CURVE25519=noasm + AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_X64" + fi + AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519" AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_CURVE25519" ENABLED_FEMATH=yes @@ -5008,7 +5019,7 @@ AS_CASE([$FIPS_VERSION], AS_IF([test "x$ENABLED_ED25519" != "xyes"], [ENABLED_ED25519="yes"; AM_CFLAGS="$AM_CFLAGS -DHAVE_ED25519 -DHAVE_ED25519_KEY_IMPORT"]) - AS_IF([test "x$ENABLED_CURVE25519" != "xyes"], + AS_IF([test "$ENABLED_CURVE25519" = "no"], [ENABLED_CURVE25519="yes"; AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519"]) AS_IF([test "x$ENABLED_ED448" != "xyes"], @@ -6047,7 +6058,7 @@ then ENABLED_ENCRYPT_THEN_MAC=yes AM_CFLAGS="$AM_CFLAGS -DHAVE_TLS_EXTENSIONS -DHAVE_SNI -DHAVE_MAX_FRAGMENT -DHAVE_TRUNCATED_HMAC -DHAVE_ALPN -DHAVE_TRUSTED_CA" # Check the ECC supported curves prereq - AS_IF([test "x$ENABLED_ECC" != "xno" || test "x$ENABLED_CURVE25519" = "xyes" || test "x$ENABLED_CURVE448" = "xyes" || test "x$ENABLED_TLS13" = "xyes"], + AS_IF([test "x$ENABLED_ECC" != "xno" || test "$ENABLED_CURVE25519" != "no" || test "x$ENABLED_CURVE448" = "xyes" || test "x$ENABLED_TLS13" = "xyes"], [ENABLED_SUPPORTED_CURVES=yes AM_CFLAGS="$AM_CFLAGS -DHAVE_SUPPORTED_CURVES"]) fi @@ -6880,7 +6891,7 @@ then AM_CFLAGS="$AM_CFLAGS -DHAVE_TLS_EXTENSIONS -DHAVE_SNI -DHAVE_MAX_FRAGMENT -DHAVE_TRUNCATED_HMAC" # Check the ECC supported curves prereq - AS_IF([test "x$ENABLED_ECC" != "xno" || test "x$ENABLED_CURVE25519" = "xyes"], + AS_IF([test "x$ENABLED_ECC" != "xno" || test "$ENABLED_CURVE25519" != "no"], [ENABLED_SUPPORTED_CURVES=yes AM_CFLAGS="$AM_CFLAGS -DHAVE_SUPPORTED_CURVES"]) fi @@ -9337,7 +9348,8 @@ AM_CONDITIONAL([BUILD_ED25519],[test "x$ENABLED_ED25519" = "xyes" || test "x$ENA AM_CONDITIONAL([BUILD_ED25519_SMALL],[test "x$ENABLED_ED25519_SMALL" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_FEMATH], [test "x$ENABLED_FEMATH" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_GEMATH], [test "x$ENABLED_GEMATH" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) -AM_CONDITIONAL([BUILD_CURVE25519],[test "x$ENABLED_CURVE25519" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) +AM_CONDITIONAL([BUILD_CURVE25519],[test "$ENABLED_CURVE25519" != "no" || test "x$ENABLED_USERSETTINGS" = "xyes"]) +AM_CONDITIONAL([BUILD_CURVE25519_INTELASM],[test "$ENABLED_CURVE25519" != "noasm" && test "$ENABLED_INTELASM" = "yes"]) AM_CONDITIONAL([BUILD_CURVE25519_SMALL],[test "x$ENABLED_CURVE25519_SMALL" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_ED448],[test "x$ENABLED_ED448" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_ED448_SMALL],[test "x$ENABLED_ED448_SMALL" = "xyes"]) diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c index 9b2f36126..19dd70a84 100644 --- a/linuxkm/linuxkm_memory.c +++ b/linuxkm/linuxkm_memory.c @@ -116,6 +116,10 @@ static union wc_linuxkm_fpu_savebuf { WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) { if (wc_linuxkm_fpu_states != NULL) { +#ifdef HAVE_FIPS + /* see note below in wc_linuxkm_fpu_state_assoc_unlikely(). */ + return 0; +#else static int warned_for_repeat_alloc = 0; if (! warned_for_repeat_alloc) { pr_err("attempt at repeat allocation" @@ -123,6 +127,7 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) warned_for_repeat_alloc = 1; } return BAD_STATE_E; +#endif } #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS @@ -225,12 +230,23 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) static int _warned_on_null = 0; if (wc_linuxkm_fpu_states == NULL) { - if (_warned_on_null == 0) { - pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" - " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); - _warned_on_null = 1; +#ifdef HAVE_FIPS + /* FIPS needs to use SHA256 for the core verify HMAC, before + * reaching the regular wolfCrypt_Init() logic. to break the + * dependency loop on intelasm builds, we allocate here. + * this is not thread-safe and doesn't need to be. + */ + int ret = allocate_wolfcrypt_linuxkm_fpu_states(); + if (ret != 0) +#endif + { + if (_warned_on_null == 0) { + pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); + _warned_on_null = 1; + } + return NULL; } - return NULL; } } @@ -282,12 +298,23 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc_unlikely(int c static int _warned_on_null = 0; if (wc_linuxkm_fpu_states == NULL) { - if (_warned_on_null == 0) { - pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" - " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); - _warned_on_null = 1; +#ifdef HAVE_FIPS + /* FIPS needs to use SHA256 for the core verify HMAC, before + * reaching the regular wolfCrypt_Init() logic. to break the + * dependency loop on intelasm builds, we allocate here. + * this is not thread-safe and doesn't need to be. + */ + int ret = allocate_wolfcrypt_linuxkm_fpu_states(); + if (ret != 0) +#endif + { + if (_warned_on_null == 0) { + pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); + _warned_on_null = 1; + } + return NULL; } - return NULL; } } @@ -419,6 +446,17 @@ static inline void wc_linuxkm_fpu_state_release( __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); } +WARN_UNUSED_RESULT int can_save_vector_registers_x86(void) +{ + if (irq_fpu_usable()) + return 1; + else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) + return 0; + else if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return 1; + return 0; +} + WARN_UNUSED_RESULT int save_vector_registers_x86(void) { #ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index 68875ed34..12dfbde26 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -338,6 +338,13 @@ #else #include #endif + #ifndef CAN_SAVE_VECTOR_REGISTERS + #ifdef DEBUG_VECTOR_REGISTER_ACCESS_FUZZING + #define CAN_SAVE_VECTOR_REGISTERS() (can_save_vector_registers_x86() && (SAVE_VECTOR_REGISTERS2_fuzzer() == 0)) + #else + #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_x86() + #endif + #endif #ifndef SAVE_VECTOR_REGISTERS #define SAVE_VECTOR_REGISTERS(fail_clause) { \ int _svr_ret = save_vector_registers_x86(); \ @@ -369,6 +376,9 @@ #ifndef SAVE_VECTOR_REGISTERS2 #define SAVE_VECTOR_REGISTERS2() save_vector_registers_arm() #endif + #ifndef CAN_SAVE_VECTOR_REGISTERS + #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_arm() + #endif #ifndef RESTORE_VECTOR_REGISTERS #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm() #endif @@ -758,6 +768,7 @@ extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); extern void free_wolfcrypt_linuxkm_fpu_states(void); + extern __must_check int can_save_vector_registers_x86(void); extern __must_check int save_vector_registers_x86(void); extern void restore_vector_registers_x86(void); diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index 7fe91afee..4e474227c 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -1818,7 +1818,7 @@ static int aes_xts_128_test(void) goto out; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); @@ -1843,7 +1843,7 @@ static int aes_xts_128_test(void) goto out; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); @@ -1865,7 +1865,7 @@ static int aes_xts_128_test(void) goto out; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(cipher, 0, AES_XTS_128_TEST_BUF_SIZ); ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); @@ -1891,7 +1891,7 @@ static int aes_xts_128_test(void) goto out; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); @@ -1914,7 +1914,7 @@ static int aes_xts_128_test(void) goto out; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); diff --git a/src/include.am b/src/include.am index 20ab4fecf..efc96f245 100644 --- a/src/include.am +++ b/src/include.am @@ -995,7 +995,7 @@ if BUILD_FEMATH if BUILD_CURVE25519_SMALL src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_low_mem.c else -if BUILD_INTELASM +if BUILD_CURVE25519_INTELASM if !BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S endif !BUILD_X86_ASM @@ -1025,7 +1025,7 @@ endif !BUILD_FIPS_V6 else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_operations.c endif !BUILD_ARMASM -endif !BUILD_INTELASM +endif !BUILD_CURVE25519_INTELASM endif !BUILD_CURVE25519_SMALL endif BUILD_FEMATH @@ -1035,7 +1035,7 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ge_low_mem.c else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ge_operations.c if !BUILD_FEMATH -if BUILD_INTELASM +if BUILD_CURVE25519_INTELASM if !BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S endif !BUILD_X86_ASM @@ -1061,7 +1061,7 @@ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_operations.c endif !BUILD_ARMASM endif !BUILD_FIPS_V6 -endif !BUILD_INTELASM +endif !BUILD_CURVE25519_INTELASM endif !BUILD_FEMATH endif !BUILD_ED25519_SMALL endif BUILD_GEMATH diff --git a/tests/api.c b/tests/api.c index 869698722..347f73339 100644 --- a/tests/api.c +++ b/tests/api.c @@ -63548,7 +63548,8 @@ static int test_wolfSSL_dtls_AEAD_limit(void) #endif #if defined(WOLFSSL_DTLS) && \ - defined(HAVE_IO_TESTS_DEPENDENCIES) && !defined(SINGLE_THREADED) + defined(HAVE_IO_TESTS_DEPENDENCIES) && !defined(SINGLE_THREADED) && \ + !defined(DEBUG_VECTOR_REGISTER_ACCESS_FUZZING) static void test_wolfSSL_dtls_send_ch(WOLFSSL* ssl) { int fd, ret; diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 85a7f3a6f..c4210398e 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -1930,7 +1930,7 @@ static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock, word32 t0, t1, t2, t3; const word32* rk; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK rk = aes->key_C_fallback; #else rk = aes->key; @@ -2945,7 +2945,7 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock, word32 t0, t1, t2, t3; const word32* rk; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK rk = aes->key_C_fallback; #else rk = aes->key; @@ -4085,7 +4085,7 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( */ static void AesSetKey_C(Aes* aes, const byte* key, word32 keySz, int dir) { -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK word32* rk = aes->key_C_fallback; #else word32* rk = aes->key; @@ -4246,7 +4246,7 @@ static void AesSetKey_C(Aes* aes, const byte* key, word32 keySz, int dir) if (dir == AES_DECRYPTION) { unsigned int j; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK rk = aes->key_C_fallback; #else rk = aes->key; @@ -4455,11 +4455,11 @@ static void AesSetKey_C(Aes* aes, const byte* key, word32 keySz, int dir) if (ret != 0) return ret; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK #ifdef NEED_AES_TABLES AesSetKey_C(aes, userKey, keylen, dir); #endif /* NEED_AES_TABLES */ -#endif /* WC_AES_C_DYNAMIC_FALLBACK */ +#endif /* WC_C_DYNAMIC_FALLBACK */ #ifdef WOLFSSL_AESNI aes->use_aesni = 0; @@ -4488,13 +4488,13 @@ static void AesSetKey_C(Aes* aes, const byte* key, word32 keySz, int dir) if (ret == 0) aes->use_aesni = 1; else { -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK ret = 0; #endif } return ret; } else { -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK return 0; #else return ret; @@ -4680,7 +4680,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef WOLFSSL_AESNI -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK #define VECTOR_REGISTERS_PUSH { \ int orig_use_aesni = aes->use_aesni; \ @@ -12369,7 +12369,7 @@ int wc_AesXtsSetKeyNoInit(XtsAes* aes, const byte* key, word32 len, int dir) #ifdef WOLFSSL_AESNI if (ret == 0) { - /* With WC_AES_C_DYNAMIC_FALLBACK, the main and tweak keys could have + /* With WC_C_DYNAMIC_FALLBACK, the main and tweak keys could have * conflicting _aesni status, but the AES-XTS asm implementations need * them to all be AESNI. If any aren't, disable AESNI on all. */ @@ -12382,7 +12382,7 @@ int wc_AesXtsSetKeyNoInit(XtsAes* aes, const byte* key, word32 len, int dir) (dir == AES_ENCRYPTION_AND_DECRYPTION)) && (aes->aes_decrypt.use_aesni != aes->tweak.use_aesni))) { - #ifdef WC_AES_C_DYNAMIC_FALLBACK + #ifdef WC_C_DYNAMIC_FALLBACK aes->aes.use_aesni = 0; aes->aes_decrypt.use_aesni = 0; aes->tweak.use_aesni = 0; @@ -12392,7 +12392,7 @@ int wc_AesXtsSetKeyNoInit(XtsAes* aes, const byte* key, word32 len, int dir) } #else /* !WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS */ if (aes->aes.use_aesni != aes->tweak.use_aesni) { - #ifdef WC_AES_C_DYNAMIC_FALLBACK + #ifdef WC_C_DYNAMIC_FALLBACK aes->aes.use_aesni = 0; aes->tweak.use_aesni = 0; #else diff --git a/wolfcrypt/src/evp.c b/wolfcrypt/src/evp.c index 681334441..8be624689 100644 --- a/wolfcrypt/src/evp.c +++ b/wolfcrypt/src/evp.c @@ -4275,23 +4275,39 @@ static int wolfssl_evp_digest_pk_final(WOLFSSL_EVP_MD_CTX *ctx, int ret; if (ctx->isHMAC) { - Hmac hmacCopy; - - if (wolfSSL_HmacCopy(&hmacCopy, &ctx->hash.hmac) != WOLFSSL_SUCCESS) +#ifdef WOLFSSL_SMALL_STACK + Hmac *hmacCopy = (Hmac *)XMALLOC(sizeof(Hmac), NULL, DYNAMIC_TYPE_OPENSSL); + if (hmacCopy == NULL) return WOLFSSL_FAILURE; - ret = wc_HmacFinal(&hmacCopy, md) == 0; - wc_HmacFree(&hmacCopy); +#else + Hmac hmacCopy[1]; +#endif + ret = wolfSSL_HmacCopy(hmacCopy, &ctx->hash.hmac); + if (ret == WOLFSSL_SUCCESS) + ret = wc_HmacFinal(hmacCopy, md) == 0; + wc_HmacFree(hmacCopy); +#ifdef WOLFSSL_SMALL_STACK + XFREE(hmacCopy, NULL, DYNAMIC_TYPE_OPENSSL); +#endif return ret; } else { - WOLFSSL_EVP_MD_CTX ctxCopy; - wolfSSL_EVP_MD_CTX_init(&ctxCopy); - - if (wolfSSL_EVP_MD_CTX_copy_ex(&ctxCopy, ctx) != WOLFSSL_SUCCESS) +#ifdef WOLFSSL_SMALL_STACK + WOLFSSL_EVP_MD_CTX *ctxCopy = (WOLFSSL_EVP_MD_CTX *)XMALLOC(sizeof(WOLFSSL_EVP_MD_CTX), NULL, DYNAMIC_TYPE_OPENSSL); + if (ctxCopy == NULL) return WOLFSSL_FAILURE; +#else + WOLFSSL_EVP_MD_CTX ctxCopy[1]; +#endif + wolfSSL_EVP_MD_CTX_init(ctxCopy); - ret = wolfSSL_EVP_DigestFinal(&ctxCopy, md, mdlen); - wolfSSL_EVP_MD_CTX_cleanup(&ctxCopy); + ret = wolfSSL_EVP_MD_CTX_copy_ex(ctxCopy, ctx); + if (ret == WOLFSSL_SUCCESS) + ret = wolfSSL_EVP_DigestFinal(ctxCopy, md, mdlen); + wolfSSL_EVP_MD_CTX_cleanup(ctxCopy); +#ifdef WOLFSSL_SMALL_STACK + XFREE(ctxCopy, NULL, DYNAMIC_TYPE_OPENSSL); +#endif return ret; } } diff --git a/wolfcrypt/src/port/cuda/aes-cuda.cu b/wolfcrypt/src/port/cuda/aes-cuda.cu index 24554a020..0fec5d35c 100644 --- a/wolfcrypt/src/port/cuda/aes-cuda.cu +++ b/wolfcrypt/src/port/cuda/aes-cuda.cu @@ -944,7 +944,7 @@ void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock, word32* rk_GPU = NULL; cudaError_t ret = cudaSuccess; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK if ( ret == cudaSuccess ) ret = cudaMalloc(&rk_GPU, sizeof(aes->key_C_fallback)); if ( ret == cudaSuccess ) @@ -991,7 +991,7 @@ void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz) word32* rk_GPU = NULL; cudaError_t ret = cudaSuccess; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK if ( ret == cudaSuccess ) ret = cudaMalloc(&rk_GPU, sizeof(aes->key_C_fallback)); if ( ret == cudaSuccess ) diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 37a163f28..00c5e9afb 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -203,8 +203,12 @@ on the specific device platform. #if defined(LITTLE_ENDIAN_ORDER) && \ defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - #define SHA256_UPDATE_REV_BYTES(ctx) \ - (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA256_UPDATE_REV_BYTES(ctx) (sha256->sha_method == SHA256_C) + #else + #define SHA256_UPDATE_REV_BYTES(ctx) \ + (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #elif defined(FREESCALE_MMCAU_SHA) #define SHA256_UPDATE_REV_BYTES(ctx) 0 /* reverse not needed on update */ #else @@ -228,6 +232,15 @@ on the specific device platform. (!defined(WOLFSSL_HAVE_PSA) || defined(WOLFSSL_PSA_NO_HASH)) && \ !defined(WOLFSSL_RENESAS_RX64_HASH) +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA256_SETTRANSFORM_ARGS int *sha_method +#else + #define SHA256_SETTRANSFORM_ARGS void +#endif +static void Sha256_SetTransform(SHA256_SETTRANSFORM_ARGS); +#endif static int InitSha256(wc_Sha256* sha256) { @@ -253,6 +266,17 @@ static int InitSha256(wc_Sha256* sha256) sha256->used = 0; #endif +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + /* choose best Transform function under this runtime environment */ +#ifdef WC_C_DYNAMIC_FALLBACK + sha256->sha_method = 0; + Sha256_SetTransform(&sha256->sha_method); +#else + Sha256_SetTransform(); +#endif +#endif + #ifdef WOLF_CRYPTO_CB sha256->devId = wc_CryptoCb_DefaultDevID(); #endif @@ -371,34 +395,51 @@ static int InitSha256(wc_Sha256* sha256) } /* extern "C" */ #endif - static word32 intel_flags; - static int Transform_Sha256_is_vectorized = 0; + static word32 intel_flags = 0; + +#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS) + #define WC_NO_INTERNAL_FUNCTION_POINTERS +#endif #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS - static enum { SHA256_UNSET, SHA256_AVX1, SHA256_AVX2, SHA256_AVX1_RORX, - SHA256_AVX2_RORX, SHA256_SSE2, SHA256_C } - sha_method = SHA256_UNSET; + enum sha_methods { SHA256_UNSET = 0, SHA256_AVX1, SHA256_AVX2, + SHA256_AVX1_RORX, SHA256_AVX2_RORX, SHA256_SSE2, + SHA256_C }; - static void Sha256_SetTransform(void) +#ifndef WC_C_DYNAMIC_FALLBACK + static enum sha_methods sha_method = SHA256_UNSET; +#endif + + static void Sha256_SetTransform(SHA256_SETTRANSFORM_ARGS) { - - if (sha_method != SHA256_UNSET) + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (*sha_method) + #else + #define SHA_METHOD sha_method + #endif + if (SHA_METHOD != SHA256_UNSET) return; - intel_flags = cpuid_get_flags(); + #ifdef WC_C_DYNAMIC_FALLBACK + if (! CAN_SAVE_VECTOR_REGISTERS()) { + SHA_METHOD = SHA256_C; + return; + } + #endif + + if (intel_flags == 0) + intel_flags = cpuid_get_flags(); if (IS_INTEL_SHA(intel_flags)) { #ifdef HAVE_INTEL_AVX1 if (IS_INTEL_AVX1(intel_flags)) { - sha_method = SHA256_AVX1; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_AVX1; } else #endif { - sha_method = SHA256_SSE2; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_SSE2; } } else @@ -406,14 +447,12 @@ static int InitSha256(wc_Sha256* sha256) if (IS_INTEL_AVX2(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA256_AVX2_RORX; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_AVX2_RORX; } else #endif { - sha_method = SHA256_AVX2; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_AVX2; } } else @@ -422,30 +461,34 @@ static int InitSha256(wc_Sha256* sha256) if (IS_INTEL_AVX1(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA256_AVX1_RORX; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_AVX1_RORX; } else #endif { - sha_method = SHA256_AVX1; - Transform_Sha256_is_vectorized = 1; + SHA_METHOD = SHA256_AVX1; } } else #endif { - sha_method = SHA256_C; - Transform_Sha256_is_vectorized = 0; + SHA_METHOD = SHA256_C; } + #undef SHA_METHOD } static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) { + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (S->sha_method) + #else + #define SHA_METHOD sha_method + #endif int ret; - if (sha_method == SHA256_C) + + if (SHA_METHOD == SHA256_C) return Transform_Sha256(S, D); SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA256_AVX2: ret = Transform_Sha256_AVX2(S, D); break; @@ -469,13 +512,19 @@ static int InitSha256(wc_Sha256* sha256) } RESTORE_VECTOR_REGISTERS(); return ret; + #undef SHA_METHOD } #define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) { + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (S->sha_method) + #else + #define SHA_METHOD sha_method + #endif int ret; SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA256_AVX2: ret = Transform_Sha256_AVX2_Len(S, D, L); break; @@ -499,6 +548,7 @@ static int InitSha256(wc_Sha256* sha256) } RESTORE_VECTOR_REGISTERS(); return ret; + #undef SHA_METHOD } #define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) @@ -510,33 +560,34 @@ static int InitSha256(wc_Sha256* sha256) word32 len); /* = NULL */ static int transform_check = 0; + static int Transform_Sha256_is_vectorized = 0; static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) { int ret; -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha256_is_vectorized) SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif + #endif ret = (*Transform_Sha256_p)(S, D); -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha256_is_vectorized) RESTORE_VECTOR_REGISTERS(); -#endif + #endif return ret; } #define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) { int ret; -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha256_is_vectorized) SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif + #endif ret = (*Transform_Sha256_Len_p)(S, D, L); -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha256_is_vectorized) RESTORE_VECTOR_REGISTERS(); -#endif + #endif return ret; } #define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) @@ -632,9 +683,6 @@ static int InitSha256(wc_Sha256* sha256) if (ret != 0) return ret; - /* choose best Transform function under this runtime environment */ - Sha256_SetTransform(); - #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) ret = wolfAsync_DevCtxInit(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId); @@ -1312,7 +1360,9 @@ static int InitSha256(wc_Sha256* sha256) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha256->sha_method != SHA256_C) + #elif defined(WC_NO_INTERNAL_FUNCTION_POINTERS) if (sha_method != SHA256_C) #else if (Transform_Sha256_Len_p != NULL) @@ -1553,8 +1603,12 @@ static int InitSha256(wc_Sha256* sha256) /* Kinetis requires only these bytes reversed */ #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha256->sha_method != SHA256_C) + #else if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) #endif + #endif { ByteReverseWords( &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], @@ -1898,11 +1952,19 @@ static int InitSha256(wc_Sha256* sha256) sha224->loLen = 0; sha224->hiLen = 0; + #ifdef WC_C_DYNAMIC_FALLBACK + sha224->sha_method = 0; + #endif + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) /* choose best Transform function under this runtime environment */ + #ifdef WC_C_DYNAMIC_FALLBACK + Sha256_SetTransform(&sha224->sha_method); + #else Sha256_SetTransform(); #endif + #endif #ifdef WOLFSSL_HASH_FLAGS sha224->flags = 0; #endif diff --git a/wolfcrypt/src/sha3.c b/wolfcrypt/src/sha3.c index 30732d501..14f304fa3 100644 --- a/wolfcrypt/src/sha3.c +++ b/wolfcrypt/src/sha3.c @@ -66,9 +66,16 @@ word32 cpuid_flags; int cpuid_flags_set = 0; +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA3_BLOCK (sha3->sha3_block) + #define SHA3_BLOCK_N (sha3->sha3_block_n) +#else void (*sha3_block)(word64 *s) = NULL; void (*sha3_block_n)(word64 *s, const byte* data, word32 n, word64 c) = NULL; + #define SHA3_BLOCK sha3_block + #define SHA3_BLOCK_N sha3_block_n +#endif #endif #ifdef WOLFSSL_SHA3_SMALL @@ -622,15 +629,26 @@ static int InitSha3(wc_Sha3* sha3) if (!cpuid_flags_set) { cpuid_flags = cpuid_get_flags(); cpuid_flags_set = 1; +#ifdef WC_C_DYNAMIC_FALLBACK + } + { + if (! CAN_SAVE_VECTOR_REGISTERS()) { + SHA3_BLOCK = BlockSha3; + SHA3_BLOCK_N = NULL; + } + else +#endif if (IS_INTEL_BMI1(cpuid_flags) && IS_INTEL_BMI2(cpuid_flags)) { - sha3_block = sha3_block_bmi2; - sha3_block_n = sha3_block_n_bmi2; + SHA3_BLOCK = sha3_block_bmi2; + SHA3_BLOCK_N = sha3_block_n_bmi2; } else if (IS_INTEL_AVX2(cpuid_flags)) { - sha3_block = sha3_block_avx2; + SHA3_BLOCK = sha3_block_avx2; + SHA3_BLOCK_N = NULL; } else { - sha3_block = BlockSha3; + SHA3_BLOCK = BlockSha3; + SHA3_BLOCK_N = NULL; } } #endif @@ -652,7 +670,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) word32 blocks; #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) SAVE_VECTOR_REGISTERS(return _svr_ret;); #endif if (sha3->i > 0) { @@ -675,7 +693,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i); } #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(sha3->s); + (*SHA3_BLOCK)(sha3->s); #else BlockSha3(sha3->s); #endif @@ -684,8 +702,8 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) } blocks = len / (p * 8); #ifdef USE_INTEL_SPEEDUP - if ((sha3_block_n != NULL) && (blocks > 0)) { - (*sha3_block_n)(sha3->s, data, blocks, p * 8); + if ((SHA3_BLOCK_N != NULL) && (blocks > 0)) { + (*SHA3_BLOCK_N)(sha3->s, data, blocks, p * 8); len -= blocks * (p * 8); data += blocks * (p * 8); blocks = 0; @@ -696,7 +714,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) sha3->s[i] ^= Load64Unaligned(data + 8 * i); } #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(sha3->s); + (*SHA3_BLOCK)(sha3->s); #else BlockSha3(sha3->s); #endif @@ -704,7 +722,7 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) data += p * 8; } #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) RESTORE_VECTOR_REGISTERS(); #endif XMEMCPY(sha3->t, data, len); @@ -742,13 +760,13 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) } #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) SAVE_VECTOR_REGISTERS(return _svr_ret;); #endif for (j = 0; l - j >= rate; j += rate) { #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(sha3->s); + (*SHA3_BLOCK)(sha3->s); #else BlockSha3(sha3->s); #endif @@ -760,7 +778,7 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) } if (j != l) { #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(sha3->s); + (*SHA3_BLOCK)(sha3->s); #else BlockSha3(sha3->s); #endif @@ -770,7 +788,7 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) XMEMCPY(hash + j, sha3->s, l - j); } #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) RESTORE_VECTOR_REGISTERS(); #endif @@ -1338,6 +1356,13 @@ int wc_Shake128_Absorb(wc_Shake* shake, const byte* data, word32 len) return ret; } +#ifdef WC_C_DYNAMIC_FALLBACK + #undef SHA3_BLOCK + #undef SHA3_BLOCK_N + #define SHA3_BLOCK (shake->sha3_block) + #define SHA3_BLOCK_N (shake->sha3_block_n) +#endif + /* Squeeze the state to produce pseudo-random output. * * shake wc_Shake object holding state. @@ -1348,12 +1373,12 @@ int wc_Shake128_Absorb(wc_Shake* shake, const byte* data, word32 len) int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) { #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) SAVE_VECTOR_REGISTERS(return _svr_ret;); #endif for (; (blockCnt > 0); blockCnt--) { #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(shake->s); + (*SHA3_BLOCK)(shake->s); #else BlockSha3(shake->s); #endif @@ -1365,7 +1390,7 @@ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) out += WC_SHA3_128_COUNT * 8; } #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) RESTORE_VECTOR_REGISTERS(); #endif @@ -1486,12 +1511,12 @@ int wc_Shake256_Absorb(wc_Shake* shake, const byte* data, word32 len) int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) { #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) SAVE_VECTOR_REGISTERS(return _svr_ret;); #endif for (; (blockCnt > 0); blockCnt--) { #ifdef USE_INTEL_SPEEDUP - (*sha3_block)(shake->s); + (*SHA3_BLOCK)(shake->s); #else BlockSha3(shake->s); #endif @@ -1503,7 +1528,7 @@ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) out += WC_SHA3_256_COUNT * 8; } #if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) - if (sha3_block == sha3_block_avx2) + if (SHA3_BLOCK == sha3_block_avx2) RESTORE_VECTOR_REGISTERS(); #endif diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 50a5bb47e..8ee668f6f 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -96,7 +96,6 @@ #include #endif - #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #if defined(__GNUC__) && ((__GNUC__ < 4) || \ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) @@ -206,6 +205,16 @@ #ifdef WOLFSSL_SHA512 +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA512_SETTRANSFORM_ARGS int *sha_method +#else + #define SHA512_SETTRANSFORM_ARGS void +#endif +static void Sha512_SetTransform(SHA512_SETTRANSFORM_ARGS); +#endif + static int InitSha512(wc_Sha512* sha512) { if (sha512 == NULL) @@ -224,6 +233,16 @@ static int InitSha512(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) @@ -265,6 +284,16 @@ static int InitSha512_224(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) /* HW needs to be carefully initialized, taking into account soft copy. @@ -308,6 +337,16 @@ static int InitSha512_256(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) /* HW needs to be carefully initialized, taking into account soft copy. @@ -425,36 +464,53 @@ static int InitSha512_256(wc_Sha512* sha512) } /* extern "C" */ #endif + static word32 intel_flags = 0; + +#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS) + #define WC_NO_INTERNAL_FUNCTION_POINTERS +#endif + static int _Transform_Sha512(wc_Sha512 *sha512); - static word32 intel_flags; - static int Transform_Sha512_is_vectorized = 0; #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS - static enum { SHA512_UNSET, SHA512_AVX1, SHA512_AVX2, SHA512_AVX1_RORX, - SHA512_AVX2_RORX, SHA512_C } - sha_method = SHA512_UNSET; + enum sha_methods { SHA512_UNSET = 0, SHA512_AVX1, SHA512_AVX2, + SHA512_AVX1_RORX, SHA512_AVX2_RORX, SHA512_C }; - static void Sha512_SetTransform(void) +#ifndef WC_C_DYNAMIC_FALLBACK + static enum sha_methods sha_method = SHA512_UNSET; +#endif + + static void Sha512_SetTransform(SHA512_SETTRANSFORM_ARGS) { - - if (sha_method != SHA512_UNSET) + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (*sha_method) + #else + #define SHA_METHOD sha_method + #endif + if (SHA_METHOD != SHA512_UNSET) return; - intel_flags = cpuid_get_flags(); + #ifdef WC_C_DYNAMIC_FALLBACK + if (! CAN_SAVE_VECTOR_REGISTERS()) { + SHA_METHOD = SHA512_C; + return; + } + #endif + + if (intel_flags == 0) + intel_flags = cpuid_get_flags(); #if defined(HAVE_INTEL_AVX2) if (IS_INTEL_AVX2(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA512_AVX2_RORX; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX2_RORX; } else #endif { - sha_method = SHA512_AVX2; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX2; } } else @@ -463,30 +519,33 @@ static int InitSha512_256(wc_Sha512* sha512) if (IS_INTEL_AVX1(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA512_AVX1_RORX; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX1_RORX; } else #endif { - sha_method = SHA512_AVX1; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX1; } } else #endif { - sha_method = SHA512_C; - Transform_Sha512_is_vectorized = 0; + SHA_METHOD = SHA512_C; } + #undef SHA_METHOD } static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (sha512->sha_method) + #else + #define SHA_METHOD sha_method + #endif int ret; - if (sha_method == SHA512_C) + if (SHA_METHOD == SHA512_C) return _Transform_Sha512(sha512); SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA512_AVX2: ret = Transform_Sha512_AVX2(sha512); break; @@ -507,13 +566,18 @@ static int InitSha512_256(wc_Sha512* sha512) } RESTORE_VECTOR_REGISTERS(); return ret; + #undef SHA_METHOD } -#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) { + #ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (sha512->sha_method) + #else + #define SHA_METHOD sha_method + #endif int ret; SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA512_AVX2: ret = Transform_Sha512_AVX2_Len(sha512, len); break; @@ -534,39 +598,40 @@ static int InitSha512_256(wc_Sha512* sha512) } RESTORE_VECTOR_REGISTERS(); return ret; + #undef SHA_METHOD } -#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) #else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */ static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512; static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL; static int transform_check = 0; + static int Transform_Sha512_is_vectorized = 0; static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { int ret; -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha512_is_vectorized) SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif + #endif ret = (*Transform_Sha512_p)(sha512); -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha512_is_vectorized) RESTORE_VECTOR_REGISTERS(); -#endif + #endif return ret; } static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) { int ret; -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha512_is_vectorized) SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif + #endif ret = (*Transform_Sha512_Len_p)(sha512, len); -#ifdef WOLFSSL_LINUXKM + #ifdef WOLFSSL_LINUXKM if (Transform_Sha512_is_vectorized) RESTORE_VECTOR_REGISTERS(); -#endif + #endif return ret; } @@ -634,7 +699,7 @@ static int InitSha512_256(wc_Sha512* sha512) static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, int (*initfp)(wc_Sha512*)) { - int ret = 0; + int ret = 0; if (sha512 == NULL) { return BAD_FUNC_ARG; @@ -655,10 +720,6 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, if (ret != 0) return ret; -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif #ifdef WOLFSSL_HASH_KEEP sha512->msg = NULL; sha512->len = 0; @@ -898,7 +959,11 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { #if !defined(WOLFSSL_ESP32_CRYPT) || \ @@ -934,11 +999,14 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method != SHA512_C) + #elif defined(WC_NO_INTERNAL_FUNCTION_POINTERS) if (sha_method != SHA512_C) #else if (Transform_Sha512_Len_p != NULL) #endif + { word32 blocksLen = len & ~((word32)WC_SHA512_BLOCK_SIZE-1); @@ -964,7 +1032,11 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif { ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_BLOCK_SIZE); @@ -1094,7 +1166,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { @@ -1140,7 +1216,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif #if !defined(WOLFSSL_ESP32_CRYPT) || \ defined(NO_WOLFSSL_ESP32_CRYPT_HASH) || \ @@ -1159,7 +1239,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method != SHA512_C) + #else if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + #endif ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); @@ -1362,15 +1446,14 @@ int wc_Sha512Transform(wc_Sha512* sha, const unsigned char* data) return MEMORY_E; #endif -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif - #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { ByteReverseWords64((word64*)data, (word64*)data, @@ -1463,6 +1546,16 @@ static int InitSha384(wc_Sha384* sha384) sha384->loLen = 0; sha384->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha384->sha_method = 0; + Sha512_SetTransform(&sha384->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA384) /* HW needs to be carefully initialized, taking into account soft copy. @@ -1605,11 +1698,6 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) return ret; } -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif - #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384, sha384->heap, devId); diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 88e1f94fc..10a3cef36 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -9976,7 +9976,7 @@ static wc_test_ret_t aes_xts_128_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ - defined(WC_AES_C_DYNAMIC_FALLBACK) + defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); #if defined(WOLFSSL_ASYNC_CRYPT) @@ -10004,7 +10004,7 @@ static wc_test_ret_t aes_xts_128_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ - defined(WC_AES_C_DYNAMIC_FALLBACK) + defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); #if defined(WOLFSSL_ASYNC_CRYPT) @@ -10029,7 +10029,7 @@ static wc_test_ret_t aes_xts_128_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ - defined(WC_AES_C_DYNAMIC_FALLBACK) + defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(cipher, 0, sizeof(cipher)); ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); @@ -10062,7 +10062,7 @@ static wc_test_ret_t aes_xts_128_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ - defined(WC_AES_C_DYNAMIC_FALLBACK) + defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); @@ -10096,7 +10096,7 @@ static wc_test_ret_t aes_xts_128_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ - defined(WC_AES_C_DYNAMIC_FALLBACK) + defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); @@ -10987,7 +10987,7 @@ static wc_test_ret_t aesecb_test(void) if (XMEMCMP(cipher, niCipher, AES_BLOCK_SIZE) != 0) ERROR_OUT(WC_TEST_RET_ENC_NC, out); -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) XMEMSET(cipher, 0, AES_BLOCK_SIZE); WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesSetKey(enc, niKey, sizeof(niKey), cipher, AES_ENCRYPTION); @@ -11029,7 +11029,7 @@ static wc_test_ret_t aesecb_test(void) if (XMEMCMP(plain, niPlain, AES_BLOCK_SIZE) != 0) ERROR_OUT(WC_TEST_RET_ENC_NC, out); -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) XMEMSET(plain, 0, AES_BLOCK_SIZE); WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesSetKey(dec, niKey, sizeof(niKey), plain, AES_DECRYPTION); @@ -11694,7 +11694,7 @@ static wc_test_ret_t aesctr_test(Aes* enc, Aes* dec, byte* cipher, byte* plain) #endif } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) for (i = 0; i < AES_CTR_TEST_LEN; i++) { if (testVec[i].key != NULL) { ret = wc_AesSetKeyDirect(enc, testVec[i].key, testVec[i].keySz, @@ -11771,7 +11771,7 @@ static wc_test_ret_t aesctr_test(Aes* enc, Aes* dec, byte* cipher, byte* plain) #endif } -#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_AES_C_DYNAMIC_FALLBACK */ +#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_C_DYNAMIC_FALLBACK */ out: @@ -12036,7 +12036,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes_test(void) break; } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) /* Iterate from one AES_BLOCK_SIZE of bigMsg through the whole * message by AES_BLOCK_SIZE for each size of AES key. */ WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -12085,7 +12085,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes_test(void) break; } WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); -#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_AES_C_DYNAMIC_FALLBACK */ +#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_C_DYNAMIC_FALLBACK */ #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) XFREE(bigCipher, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); @@ -12587,7 +12587,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes256_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #endif -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) ret = wc_AesSetKey(enc, key, keySz, iv, AES_ENCRYPTION); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -12662,7 +12662,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes256_test(void) ERROR_OUT(WC_TEST_RET_ENC_NC, out); #endif -#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_AES_C_DYNAMIC_FALLBACK */ +#endif /* DEBUG_VECTOR_REGISTER_ACCESS && WC_C_DYNAMIC_FALLBACK */ out: @@ -12754,7 +12754,7 @@ static wc_test_ret_t aesgcm_default_test_helper(byte* key, int keySz, byte* iv, if (XMEMCMP(tag, resultT, tagSz)) ERROR_OUT(WC_TEST_RET_ENC_NC, out); -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesGcmEncrypt(enc, resultC, plain, plainSz, iv, ivSz, resultT, tagSz, aad, aadSz); @@ -12789,7 +12789,7 @@ static wc_test_ret_t aesgcm_default_test_helper(byte* key, int keySz, byte* iv, ERROR_OUT(WC_TEST_RET_ENC_NC, out); } -#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_AES_C_DYNAMIC_FALLBACK) +#if defined(DEBUG_VECTOR_REGISTER_ACCESS) && defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); ret = wc_AesGcmDecrypt(dec, resultP, resultC, cipherSz, iv, ivSz, resultT, tagSz, aad, aadSz); @@ -22661,9 +22661,19 @@ static wc_test_ret_t openssl_aes_test(void) WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) { wc_test_ret_t ret; - EVP_MD_CTX md_ctx; +#ifdef WOLFSSL_SMALL_STACK + EVP_MD_CTX *md_ctx = (EVP_MD_CTX *)XMALLOC(sizeof(EVP_MD_CTX), NULL, DYNAMIC_TYPE_OPENSSL); +#else + EVP_MD_CTX md_ctx[1]; +#endif testVector a, b, c, d, e, f; byte hash[WC_SHA256_DIGEST_SIZE*2]; /* max size */ + +#ifdef WOLFSSL_SMALL_STACK + if (md_ctx == NULL) + return WC_TEST_RET_ENC_EC(MEMORY_E); +#endif + WOLFSSL_ENTER("openssl_test"); a.inLen = 0; @@ -22697,15 +22707,15 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) a.inLen = XSTRLEN(a.input); a.outLen = WC_MD5_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_md5()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_md5()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, a.input, (unsigned long)a.inLen); + ret = EVP_DigestUpdate(md_ctx, a.input, (unsigned long)a.inLen); } if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS) return WC_TEST_RET_ENC_NC; if (XMEMCMP(hash, a.output, WC_MD5_DIGEST_SIZE) != 0) @@ -22721,14 +22731,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) b.inLen = XSTRLEN(b.input); b.outLen = WC_SHA_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha1()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha1()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, b.input, (unsigned long)b.inLen); + ret = EVP_DigestUpdate(md_ctx, b.input, (unsigned long)b.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS) return WC_TEST_RET_ENC_NC; if (XMEMCMP(hash, b.output, b.outLen) != 0) @@ -22743,14 +22753,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) e.inLen = XSTRLEN(e.input); e.outLen = WC_SHA224_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha224()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha224()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen); + ret = EVP_DigestUpdate(md_ctx, e.input, (unsigned long)e.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, e.output, e.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22764,14 +22774,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) d.inLen = XSTRLEN(d.input); d.outLen = WC_SHA256_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha256()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha256()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, d.input, (unsigned long)d.inLen); + ret = EVP_DigestUpdate(md_ctx, d.input, (unsigned long)d.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, d.output, d.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22787,14 +22797,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) e.inLen = XSTRLEN(e.input); e.outLen = WC_SHA384_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha384()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha384()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen); + ret = EVP_DigestUpdate(md_ctx, e.input, (unsigned long)e.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, e.output, e.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22811,14 +22821,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) f.inLen = XSTRLEN(f.input); f.outLen = WC_SHA512_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha512()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha512()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen); + ret = EVP_DigestUpdate(md_ctx, f.input, (unsigned long)f.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, f.output, f.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22833,14 +22843,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) f.inLen = XSTRLEN(f.input); f.outLen = WC_SHA512_224_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha512_224()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha512_224()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen); + ret = EVP_DigestUpdate(md_ctx, f.input, (unsigned long)f.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, f.output, f.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22856,14 +22866,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) f.inLen = XSTRLEN(f.input); f.outLen = WC_SHA512_256_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha512_256()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha512_256()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen); + ret = EVP_DigestUpdate(md_ctx, f.input, (unsigned long)f.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, f.output, f.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22879,14 +22889,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) e.inLen = XSTRLEN(e.input); e.outLen = WC_SHA3_224_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha3_224()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha3_224()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen); + ret = EVP_DigestUpdate(md_ctx, e.input, (unsigned long)e.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, e.output, e.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22901,14 +22911,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) d.inLen = XSTRLEN(d.input); d.outLen = WC_SHA3_256_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha3_256()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha3_256()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, d.input, (unsigned long)d.inLen); + ret = EVP_DigestUpdate(md_ctx, d.input, (unsigned long)d.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, d.output, d.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22923,14 +22933,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) e.inLen = XSTRLEN(e.input); e.outLen = WC_SHA3_384_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha3_384()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha3_384()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen); + ret = EVP_DigestUpdate(md_ctx, e.input, (unsigned long)e.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, e.output, e.outLen) != 0) { return WC_TEST_RET_ENC_NC; } @@ -22946,14 +22956,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) f.inLen = XSTRLEN(f.input); f.outLen = WC_SHA3_512_DIGEST_SIZE; - EVP_MD_CTX_init(&md_ctx); - ret = EVP_DigestInit(&md_ctx, EVP_sha3_512()); + EVP_MD_CTX_init(md_ctx); + ret = EVP_DigestInit(md_ctx, EVP_sha3_512()); if (ret == WOLFSSL_SUCCESS) { - ret = EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen); + ret = EVP_DigestUpdate(md_ctx, f.input, (unsigned long)f.inLen); if (ret == WOLFSSL_SUCCESS) - ret = EVP_DigestFinal(&md_ctx, hash, 0); + ret = EVP_DigestFinal(md_ctx, hash, 0); } - EVP_MD_CTX_cleanup(&md_ctx); + EVP_MD_CTX_cleanup(md_ctx); if (ret != WOLFSSL_SUCCESS || XMEMCMP(hash, f.output, f.outLen) != 0) { return WC_TEST_RET_ENC_NC; @@ -22961,6 +22971,11 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t openssl_test(void) #endif /* WOLFSSL_NOSHA3_512 */ #endif /* WOLFSSL_SHA3 */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(md_ctx, NULL, DYNAMIC_TYPE_OPENSSL); + md_ctx = NULL; +#endif + #ifndef WC_NO_RNG if (RAND_bytes(hash, sizeof(hash)) != WOLFSSL_SUCCESS) return WC_TEST_RET_ENC_NC; diff --git a/wolfssl/openssl/sha3.h b/wolfssl/openssl/sha3.h index 1b0d63bc5..9f6b0f997 100644 --- a/wolfssl/openssl/sha3.h +++ b/wolfssl/openssl/sha3.h @@ -27,6 +27,7 @@ #include #include +#include #ifdef WOLFSSL_PREFIX #include "prefix_sha.h" @@ -41,7 +42,11 @@ * to Sha3 is expected to also be 16 byte aligned addresses. */ struct WOLFSSL_SHA3_CTX { /* big enough to hold wolfcrypt Sha3, but check on init */ +#ifdef WOLFSSL_SHA3 + ALIGN16 void* holder[sizeof(struct wc_Sha3)]; +#else ALIGN16 void* holder[(424 + WC_ASYNC_DEV_SIZE) / sizeof(void*)]; +#endif }; #ifndef WOLFSSL_NOSHA3_224 diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index 00f7ee3b0..81d135b0a 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -261,7 +261,7 @@ struct Aes { ALIGN16 bs_word bs_key[15 * AES_BLOCK_SIZE * BS_WORD_SIZE]; #endif word32 rounds; -#ifdef WC_AES_C_DYNAMIC_FALLBACK +#ifdef WC_C_DYNAMIC_FALLBACK word32 key_C_fallback[60]; #endif int keylen; diff --git a/wolfssl/wolfcrypt/memory.h b/wolfssl/wolfcrypt/memory.h index 9a1d7b06d..9702dab7a 100644 --- a/wolfssl/wolfcrypt/memory.h +++ b/wolfssl/wolfcrypt/memory.h @@ -272,6 +272,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, #ifndef WC_DEBUG_VECTOR_REGISTERS_FUZZING_SEED #define WC_DEBUG_VECTOR_REGISTERS_FUZZING_SEED 0 #endif + #ifndef CAN_SAVE_VECTOR_REGISTERS + #define CAN_SAVE_VECTOR_REGISTERS() (SAVE_VECTOR_REGISTERS2_fuzzer() == 0) + #endif #endif #ifdef DEBUG_VECTOR_REGISTER_ACCESS diff --git a/wolfssl/wolfcrypt/sha256.h b/wolfssl/wolfcrypt/sha256.h index e08c45db4..d8239c2e3 100644 --- a/wolfssl/wolfcrypt/sha256.h +++ b/wolfssl/wolfcrypt/sha256.h @@ -186,6 +186,11 @@ struct wc_Sha256 { word32 loLen; /* length in bytes */ word32 hiLen; /* length in bytes */ void* heap; + +#ifdef WC_C_DYNAMIC_FALLBACK + int sha_method; +#endif + #endif #ifdef WOLFSSL_PIC32MZ_HASH hashUpdCache cache; /* cache for updates */ diff --git a/wolfssl/wolfcrypt/sha3.h b/wolfssl/wolfcrypt/sha3.h index b24444f89..149c714c4 100644 --- a/wolfssl/wolfcrypt/sha3.h +++ b/wolfssl/wolfcrypt/sha3.h @@ -124,6 +124,12 @@ struct wc_Sha3 { void* heap; +#ifdef WC_C_DYNAMIC_FALLBACK + void (*sha3_block)(word64 *s); + void (*sha3_block_n)(word64 *s, const byte* data, word32 n, + word64 c); +#endif + #ifdef WOLFSSL_ASYNC_CRYPT WC_ASYNC_DEV asyncDev; #endif /* WOLFSSL_ASYNC_CRYPT */ diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h index 7db8c93f2..96bbc28e4 100644 --- a/wolfssl/wolfcrypt/sha512.h +++ b/wolfssl/wolfcrypt/sha512.h @@ -151,6 +151,9 @@ struct wc_Sha512 { #ifdef USE_INTEL_SPEEDUP const byte* data; #endif +#ifdef WC_C_DYNAMIC_FALLBACK + int sha_method; +#endif #ifdef WOLFSSL_ASYNC_CRYPT WC_ASYNC_DEV asyncDev; #endif /* WOLFSSL_ASYNC_CRYPT */ diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index f5a279feb..89f01e259 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -1636,6 +1636,9 @@ typedef struct w64wrapper { #ifndef SAVE_VECTOR_REGISTERS2 #define SAVE_VECTOR_REGISTERS2() 0 #endif + #ifndef CAN_SAVE_VECTOR_REGISTERS + #define CAN_SAVE_VECTOR_REGISTERS() 1 + #endif #ifndef WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL #define WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(x) WC_DO_NOTHING #endif