diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 50a5bb47e..8f11eab85 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -96,7 +96,6 @@ #include #endif - #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #if defined(__GNUC__) && ((__GNUC__ < 4) || \ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) @@ -206,6 +205,17 @@ #ifdef WOLFSSL_SHA512 +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +static void Sha512_SetTransform( +#ifdef WC_C_DYNAMIC_FALLBACK + int *sha_method +#else + void +#endif + ); +#endif + static int InitSha512(wc_Sha512* sha512) { if (sha512 == NULL) @@ -224,6 +234,16 @@ static int InitSha512(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) @@ -265,6 +285,16 @@ static int InitSha512_224(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) /* HW needs to be carefully initialized, taking into account soft copy. @@ -308,6 +338,16 @@ static int InitSha512_256(wc_Sha512* sha512) sha512->loLen = 0; sha512->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha512->sha_method = 0; + Sha512_SetTransform(&sha512->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA512) /* HW needs to be carefully initialized, taking into account soft copy. @@ -425,36 +465,58 @@ static int InitSha512_256(wc_Sha512* sha512) } /* extern "C" */ #endif +#if defined(WC_C_DYNAMIC_FALLBACK) && !defined(WC_NO_INTERNAL_FUNCTION_POINTERS) + #define WC_NO_INTERNAL_FUNCTION_POINTERS +#endif + static int _Transform_Sha512(wc_Sha512 *sha512); - static word32 intel_flags; - static int Transform_Sha512_is_vectorized = 0; #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS - static enum { SHA512_UNSET, SHA512_AVX1, SHA512_AVX2, SHA512_AVX1_RORX, - SHA512_AVX2_RORX, SHA512_C } - sha_method = SHA512_UNSET; + enum sha_methods { SHA512_UNSET = 0, SHA512_AVX1, SHA512_AVX2, + SHA512_AVX1_RORX, SHA512_AVX2_RORX, SHA512_C }; - static void Sha512_SetTransform(void) +#ifndef WC_C_DYNAMIC_FALLBACK + static enum sha_methods sha_method = SHA512_UNSET; + static word32 intel_flags; +#endif + + static void Sha512_SetTransform( +#ifdef WC_C_DYNAMIC_FALLBACK + int *sha_method +#else + void +#endif + ) { - - if (sha_method != SHA512_UNSET) +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (*sha_method) + word32 intel_flags; +#else + #define SHA_METHOD sha_method +#endif + if (SHA_METHOD != SHA512_UNSET) return; +#ifdef WC_C_DYNAMIC_FALLBACK + if (! CAN_SAVE_VECTOR_REGISTERS()) { + SHA_METHOD = SHA512_C; + return; + } +#endif + intel_flags = cpuid_get_flags(); #if defined(HAVE_INTEL_AVX2) if (IS_INTEL_AVX2(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA512_AVX2_RORX; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX2_RORX; } else #endif { - sha_method = SHA512_AVX2; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX2; } } else @@ -463,30 +525,33 @@ static int InitSha512_256(wc_Sha512* sha512) if (IS_INTEL_AVX1(intel_flags)) { #ifdef HAVE_INTEL_RORX if (IS_INTEL_BMI2(intel_flags)) { - sha_method = SHA512_AVX1_RORX; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX1_RORX; } else #endif { - sha_method = SHA512_AVX1; - Transform_Sha512_is_vectorized = 1; + SHA_METHOD = SHA512_AVX1; } } else #endif { - sha_method = SHA512_C; - Transform_Sha512_is_vectorized = 0; + SHA_METHOD = SHA512_C; } +#undef SHA_METHOD } static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (sha512->sha_method) +#else + #define SHA_METHOD sha_method +#endif int ret; - if (sha_method == SHA512_C) + if (SHA_METHOD == SHA512_C) return _Transform_Sha512(sha512); SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA512_AVX2: ret = Transform_Sha512_AVX2(sha512); break; @@ -507,13 +572,19 @@ static int InitSha512_256(wc_Sha512* sha512) } RESTORE_VECTOR_REGISTERS(); return ret; +#undef SHA_METHOD } #define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) { +#ifdef WC_C_DYNAMIC_FALLBACK + #define SHA_METHOD (sha512->sha_method) +#else + #define SHA_METHOD sha_method +#endif int ret; SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (sha_method) { + switch (SHA_METHOD) { case SHA512_AVX2: ret = Transform_Sha512_AVX2_Len(sha512, len); break; @@ -534,6 +605,7 @@ static int InitSha512_256(wc_Sha512* sha512) } RESTORE_VECTOR_REGISTERS(); return ret; +#undef SHA_METHOD } #define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) @@ -542,6 +614,7 @@ static int InitSha512_256(wc_Sha512* sha512) static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512; static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL; static int transform_check = 0; + static int Transform_Sha512_is_vectorized = 0; static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { int ret; @@ -634,7 +707,7 @@ static int InitSha512_256(wc_Sha512* sha512) static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, int (*initfp)(wc_Sha512*)) { - int ret = 0; + int ret = 0; if (sha512 == NULL) { return BAD_FUNC_ARG; @@ -655,10 +728,6 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId, if (ret != 0) return ret; -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif #ifdef WOLFSSL_HASH_KEEP sha512->msg = NULL; sha512->len = 0; @@ -898,7 +967,11 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { #if !defined(WOLFSSL_ESP32_CRYPT) || \ @@ -934,11 +1007,14 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method != SHA512_C) + #elif defined(WC_NO_INTERNAL_FUNCTION_POINTERS) if (sha_method != SHA512_C) #else if (Transform_Sha512_Len_p != NULL) #endif + { word32 blocksLen = len & ~((word32)WC_SHA512_BLOCK_SIZE-1); @@ -964,7 +1040,11 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif { ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_BLOCK_SIZE); @@ -1094,7 +1174,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { @@ -1140,7 +1224,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif #if !defined(WOLFSSL_ESP32_CRYPT) || \ defined(NO_WOLFSSL_ESP32_CRYPT_HASH) || \ @@ -1159,7 +1247,11 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha512->sha_method != SHA512_C) + #else if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + #endif ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); @@ -1362,15 +1454,14 @@ int wc_Sha512Transform(wc_Sha512* sha, const unsigned char* data) return MEMORY_E; #endif -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif - #if defined(LITTLE_ENDIAN_ORDER) #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #ifdef WC_C_DYNAMIC_FALLBACK + if (sha->sha_method == SHA512_C) + #else if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif #endif { ByteReverseWords64((word64*)data, (word64*)data, @@ -1463,6 +1554,16 @@ static int InitSha384(wc_Sha384* sha384) sha384->loLen = 0; sha384->hiLen = 0; +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) +#ifdef WC_C_DYNAMIC_FALLBACK + sha384->sha_method = 0; + Sha512_SetTransform(&sha384->sha_method); +#else + Sha512_SetTransform(); +#endif +#endif + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA384) /* HW needs to be carefully initialized, taking into account soft copy. @@ -1605,11 +1706,6 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) return ret; } -#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - Sha512_SetTransform(); -#endif - #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384, sha384->heap, devId); diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h index 7db8c93f2..96bbc28e4 100644 --- a/wolfssl/wolfcrypt/sha512.h +++ b/wolfssl/wolfcrypt/sha512.h @@ -151,6 +151,9 @@ struct wc_Sha512 { #ifdef USE_INTEL_SPEEDUP const byte* data; #endif +#ifdef WC_C_DYNAMIC_FALLBACK + int sha_method; +#endif #ifdef WOLFSSL_ASYNC_CRYPT WC_ASYNC_DEV asyncDev; #endif /* WOLFSSL_ASYNC_CRYPT */