From 4c709f1f2c7396e2b26cfcad699825fc9f1e94db Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 25 Sep 2019 12:47:12 -0700 Subject: [PATCH 1/4] Improvements to SHA-1, SHA-256 and MD5 performance: * Added detection for buffer alignment to avoid memcpy. * Added MD5 and SHA-1 support for XTRANSFORM_LEN to process blocks. * Cleanups for consistency between algorithms and code commenting. * Enhancement for NXP MMCAU to process more than one block at a time. * Improved MMCAU performance: SHA-1 by 35%, SHA-256 by 20% and MD5 by 78%. ``` NXP K64 w/MMCAU after: MD5 8 MB took 1.000 seconds, 7.910 MB/s SHA 4 MB took 1.005 seconds, 3.644 MB/s SHA-256 2 MB took 1.006 seconds, 2.306 MB/s NXP K64 w/MMCAU before: MD5 4 MB took 1.004 seconds, 4.450 MB/s SHA 3 MB took 1.006 seconds, 2.670 MB/s SHA-256 2 MB took 1.008 seconds, 1.913 MB/s ``` --- IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c | 1 - IDE/ROWLEY-CROSSWORKS-ARM/test_main.c | 2 +- IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h | 1 + wolfcrypt/src/md5.c | 438 +++++++++++------- wolfcrypt/src/port/Espressif/esp32_sha.c | 10 +- wolfcrypt/src/sha.c | 210 ++++++--- wolfcrypt/src/sha256.c | 319 +++++++------ .../wolfcrypt/port/Espressif/esp32-crypt.h | 6 +- 8 files changed, 605 insertions(+), 382 deletions(-) diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c b/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c index 7e5f2d36d..d2d1f0370 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c +++ b/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c @@ -89,7 +89,6 @@ /* Note: You will also need to update the UART clock gate in hw_uart_init (SIM_SCGC1_UART5_MASK) */ /* Note: TWR-K60 is UART3, PTC17 */ -/* Note: FRDM-K64 is UART4, PTE24 */ /* Note: FRDM-K64 is UART4, PTE24 or UART0 PTB17 for OpenOCD (SIM_SCGC4_UART0_MASK)*/ /* Note: TWR-K64 is UART5, PTE8 */ /* Note: FRDM-K82F is LPUART0 A2, LPUART4 PTC15 */ diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c b/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c index 6fb50311f..c3d306e46 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c +++ b/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c @@ -65,7 +65,7 @@ void main(void) test_num++; } while(args.return_code == 0); - /*Print this again for redundancy*/ + /* Print this again for redundancy */ #ifdef WOLFSSL_FRDM_K64_JENKINS printf("\n&&&&&&&&&&&&&& done &&&&&&&&&&&&&\n"); delay_us(1000000); diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h index 63279511e..e99aa7aff 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h +++ b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h @@ -208,6 +208,7 @@ extern "C" { /* MD5 */ #undef NO_MD5 #if 1 +#else #define NO_MD5 #endif diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index ad0ea18f7..1b5a86e6e 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -22,7 +22,7 @@ #ifdef HAVE_CONFIG_H - #include +#include #endif #include @@ -30,7 +30,7 @@ #if !defined(NO_MD5) #if defined(WOLFSSL_TI_HASH) - /* #include included by wc_port.c */ +/* #include included by wc_port.c */ #else @@ -40,197 +40,225 @@ #include #ifdef NO_INLINE - #include +#include #else - #define WOLFSSL_MISC_INCLUDED - #include +#define WOLFSSL_MISC_INCLUDED +#include #endif /* Hardware Acceleration */ #if defined(STM32_HASH) - /* Supports CubeMX HAL or Standard Peripheral Library */ - #define HAVE_MD5_CUST_API +/* Supports CubeMX HAL or Standard Peripheral Library */ +#define HAVE_MD5_CUST_API - int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) - { - if (md5 == NULL) { - return BAD_FUNC_ARG; - } - - (void)devId; - (void)heap; - - wc_Stm32_Hash_Init(&md5->stmCtx); - - return 0; +int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) +{ + if (md5 == NULL) { + return BAD_FUNC_ARG; } - int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) - { - int ret; + (void)devId; + (void)heap; - if (md5 == NULL || (data == NULL && len > 0)) { - return BAD_FUNC_ARG; - } + wc_Stm32_Hash_Init(&md5->stmCtx); - ret = wolfSSL_CryptHwMutexLock(); - if (ret == 0) { - ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, - data, len); - wolfSSL_CryptHwMutexUnLock(); - } - return ret; + return 0; +} + +int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) +{ + int ret; + + if (md5 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; } - int wc_Md5Final(wc_Md5* md5, byte* hash) - { - int ret; - - if (md5 == NULL || hash == NULL) { - return BAD_FUNC_ARG; - } - - ret = wolfSSL_CryptHwMutexLock(); - if (ret == 0) { - ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5, - hash, WC_MD5_DIGEST_SIZE); - wolfSSL_CryptHwMutexUnLock(); - } - - (void)wc_InitMd5(md5); /* reset state */ - - return ret; + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, + data, len); + wolfSSL_CryptHwMutexUnLock(); } + return ret; +} + +int wc_Md5Final(wc_Md5* md5, byte* hash) +{ + int ret; + + if (md5 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5, + hash, WC_MD5_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitMd5(md5); /* reset state */ + + return ret; +} #elif defined(FREESCALE_MMCAU_SHA) - #include "cau_api.h" - #define XTRANSFORM(S,B) Transform((S), (B)) - static int Transform(wc_Md5* md5, byte* data) - { - int ret = wolfSSL_CryptHwMutexLock(); - if(ret == 0) { - #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_md5_hash_n(data, 1, (unsigned char*)md5->digest); - #else - MMCAU_MD5_HashN(data, 1, (uint32_t*)md5->digest); - #endif - wolfSSL_CryptHwMutexUnLock(); - } - return ret; +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" +#else + #include "fsl_mmcau.h" +#endif + +#define XTRANSFORM(S,B) Transform((S), (B)) +#define XTRANSFORM_LEN(S,B,L) Transform_Len((S), (B), (L)) + +static int Transform(wc_Md5* md5, const byte* data) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, 1, (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, 1, (uint32_t*)md5->digest); +#endif + wolfSSL_CryptHwMutexUnLock(); } + return ret; +} + +static int Transform_Len(wc_Md5* md5, const byte* data, word32 len) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, len / WC_MD5_BLOCK_SIZE, + (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, len / WC_MD5_BLOCK_SIZE, + (uint32_t*)md5->digest); +#endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} #elif defined(WOLFSSL_PIC32MZ_HASH) - #include - #define HAVE_MD5_CUST_API +#include +#define HAVE_MD5_CUST_API #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) - /* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */ - #define HAVE_MD5_CUST_API +/* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */ +#define HAVE_MD5_CUST_API #else - #define NEED_SOFT_MD5 +#define NEED_SOFT_MD5 #endif /* End Hardware Acceleration */ +#ifndef WC_MD5_DATA_ALIGNMENT + /* default to 32-bit alignement */ + #define WC_MD5_DATA_ALIGNMENT 4 +#endif #ifdef NEED_SOFT_MD5 - #define XTRANSFORM(S,B) Transform((S)) +#define XTRANSFORM(S,B) Transform((S),(B)) - #define F1(x, y, z) (z ^ (x & (y ^ z))) - #define F2(x, y, z) F1(z, x, y) - #define F3(x, y, z) (x ^ y ^ z) - #define F4(x, y, z) (y ^ (x | ~z)) +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) - #define MD5STEP(f, w, x, y, z, data, s) \ +#define MD5STEP(f, w, x, y, z, data, s) \ w = rotlFixed(w + f(x, y, z) + data, s) + x - static int Transform(wc_Md5* md5) - { - /* Copy context->state[] to working vars */ - word32 a = md5->digest[0]; - word32 b = md5->digest[1]; - word32 c = md5->digest[2]; - word32 d = md5->digest[3]; +static int Transform(wc_Md5* md5, const byte* data) +{ + word32* buffer = (word32*)data; + /* Copy context->state[] to working vars */ + word32 a = md5->digest[0]; + word32 b = md5->digest[1]; + word32 c = md5->digest[2]; + word32 d = md5->digest[3]; - MD5STEP(F1, a, b, c, d, md5->buffer[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22); + MD5STEP(F1, a, b, c, d, buffer[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, buffer[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, buffer[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, buffer[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, buffer[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, buffer[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, buffer[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, buffer[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, buffer[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, buffer[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, buffer[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, buffer[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, buffer[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, buffer[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, buffer[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, buffer[15] + 0x49b40821, 22); - MD5STEP(F2, a, b, c, d, md5->buffer[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20); + MD5STEP(F2, a, b, c, d, buffer[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, buffer[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, buffer[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, buffer[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, buffer[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, buffer[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, buffer[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, buffer[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, buffer[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, buffer[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, buffer[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, buffer[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, buffer[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, buffer[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, buffer[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, buffer[12] + 0x8d2a4c8a, 20); - MD5STEP(F3, a, b, c, d, md5->buffer[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[2] + 0xc4ac5665, 23); + MD5STEP(F3, a, b, c, d, buffer[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, buffer[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, buffer[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, buffer[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, buffer[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, buffer[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, buffer[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, buffer[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, buffer[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, buffer[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, buffer[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, buffer[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, buffer[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, buffer[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, buffer[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, buffer[2] + 0xc4ac5665, 23); - MD5STEP(F4, a, b, c, d, md5->buffer[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[9] + 0xeb86d391, 21); + MD5STEP(F4, a, b, c, d, buffer[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, buffer[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, buffer[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, buffer[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, buffer[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, buffer[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, buffer[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, buffer[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, buffer[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, buffer[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, buffer[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, buffer[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, buffer[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, buffer[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, buffer[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, buffer[9] + 0xeb86d391, 21); - /* Add the working vars back into digest state[] */ - md5->digest[0] += a; - md5->digest[1] += b; - md5->digest[2] += c; - md5->digest[3] += d; + /* Add the working vars back into digest state[] */ + md5->digest[0] += a; + md5->digest[1] += b; + md5->digest[2] += c; + md5->digest[3] += d; - return 0; - } + return 0; +} #endif /* NEED_SOFT_MD5 */ #ifndef HAVE_MD5_CUST_API @@ -277,17 +305,20 @@ int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5, - md5->heap, devId); + md5->heap, devId); #else (void)devId; #endif return ret; } +/* do block size increments/updates */ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) { int ret = 0; - byte* local; + word32 blocksLen; + byte* local; + word32* local32; if (md5 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -295,36 +326,92 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { - #if defined(HAVE_INTEL_QA) +#if defined(HAVE_INTEL_QA) return IntelQaSymMd5(&md5->asyncDev, NULL, data, len); - #endif +#endif } #endif /* WOLFSSL_ASYNC_CRYPT */ - /* do block size increments */ - local = (byte*)md5->buffer; - /* check that internal buffLen is valid */ if (md5->buffLen >= WC_MD5_BLOCK_SIZE) return BUFFER_E; - while (len) { - word32 add = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen); - XMEMCPY(&local[md5->buffLen], data, add); + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } - md5->buffLen += add; - data += add; - len -= add; + /* add length for final */ + AddLength(md5, len); + + local = (byte*)md5->buffer; + local32 = md5->buffer; + + /* process any remainder from previous operation */ + if (md5->buffLen > 0) { + blocksLen = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen); + XMEMCPY(&local[md5->buffLen], data, blocksLen); + + md5->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (md5->buffLen == WC_MD5_BLOCK_SIZE) { #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); #endif - XTRANSFORM(md5, local); - AddLength(md5, WC_MD5_BLOCK_SIZE); + + ret = XTRANSFORM(md5, (const byte*)local); + if (ret != 0) + return ret; + md5->buffLen = 0; } } + + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_MD5_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(md5, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_MD5_BLOCK_SIZE) { + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Big Endian requires byte swap, so can't use data directly */ + #if defined(WC_MD5_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) + if (((size_t)data % WC_MD5_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_MD5_BLOCK_SIZE); + } + + data += WC_MD5_BLOCK_SIZE; + len -= WC_MD5_BLOCK_SIZE; + + #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); + #endif + + ret = XTRANSFORM(md5, (const byte*)local32); + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + md5->buffLen = len; + } + return ret; } @@ -338,15 +425,14 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { - #if defined(HAVE_INTEL_QA) +#if defined(HAVE_INTEL_QA) return IntelQaSymMd5(&md5->asyncDev, hash, NULL, WC_MD5_DIGEST_SIZE); - #endif +#endif } #endif /* WOLFSSL_ASYNC_CRYPT */ local = (byte*)md5->buffer; - AddLength(md5, md5->buffLen); /* before adding pads */ local[md5->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ @@ -354,9 +440,9 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) XMEMSET(&local[md5->buffLen], 0, WC_MD5_BLOCK_SIZE - md5->buffLen); md5->buffLen += WC_MD5_BLOCK_SIZE - md5->buffLen; - #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) +#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); - #endif +#endif XTRANSFORM(md5, local); md5->buffLen = 0; } @@ -367,7 +453,7 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) #endif /* put lengths in bits */ - md5->hiLen = (md5->loLen >> (8*sizeof(md5->loLen) - 3)) + + md5->hiLen = (md5->loLen >> (8 * sizeof(md5->loLen) - 3)) + (md5->hiLen << 3); md5->loLen = md5->loLen << 3; @@ -441,7 +527,7 @@ int wc_Md5Copy(wc_Md5* src, wc_Md5* dst) ret = wc_Pic32HashCopy(&src->cache, &dst->cache); #endif #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) - dst->flags |= WC_HASH_FLAG_ISCOPY; + dst->flags |= WC_HASH_FLAG_ISCOPY; #endif return ret; diff --git a/wolfcrypt/src/port/Espressif/esp32_sha.c b/wolfcrypt/src/port/Espressif/esp32_sha.c index c60f31d23..57377aef6 100644 --- a/wolfcrypt/src/port/Espressif/esp32_sha.c +++ b/wolfcrypt/src/port/Espressif/esp32_sha.c @@ -279,7 +279,7 @@ static void esp_digest_state(WC_ESP32SHA* ctx, byte* hash, enum SHA_TYPE sha_typ /* * sha1 process */ -int esp_sha_process(struct wc_Sha* sha) +int esp_sha_process(struct wc_Sha* sha, const byte* data) { int ret = 0; @@ -287,7 +287,7 @@ int esp_sha_process(struct wc_Sha* sha) word32 SHA_START_REG = SHA_1_START_REG; - esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, + esp_process_block(&sha->ctx, SHA_START_REG, (const word32*)data, WC_SHA_BLOCK_SIZE); ESP_LOGV(TAG, "leave esp_sha_process"); @@ -322,7 +322,7 @@ int esp_sha_digest_process(struct wc_Sha* sha, byte blockproc) /* * sha256 process */ -int esp_sha256_process(struct wc_Sha256* sha) +int esp_sha256_process(struct wc_Sha256* sha, const byte* data) { int ret = 0; word32 SHA_START_REG = SHA_1_START_REG; @@ -332,8 +332,8 @@ int esp_sha256_process(struct wc_Sha256* sha) /* start register offset */ SHA_START_REG += (SHA2_256 << 4); - esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, - WC_SHA256_BLOCK_SIZE); + esp_process_block(&sha->ctx, SHA_START_REG, (const word32*)data, + WC_SHA256_BLOCK_SIZE); ESP_LOGV(TAG, "leave esp_sha256_process"); diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 72f3af886..a601e9e28 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -205,7 +205,9 @@ #endif #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */ - #define XTRANSFORM(S,B) Transform((S),(B)) + + #define XTRANSFORM(S,B) Transform((S),(B)) + #define XTRANSFORM_LEN(S,B,L) Transform_Len((S),(B),(L)) static int InitSha(wc_Sha* sha) { @@ -228,14 +230,29 @@ return ret; } - static int Transform(wc_Sha* sha, byte* data) + static int Transform(wc_Sha* sha, const byte* data) { int ret = wolfSSL_CryptHwMutexLock(); if(ret == 0) { #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_sha1_hash_n(data, 1, sha->digest); + cau_sha1_hash_n((byte*)data, 1, sha->digest); #else - MMCAU_SHA1_HashN(data, 1, (uint32_t*)sha->digest); + MMCAU_SHA1_HashN((byte*)data, 1, (uint32_t*)sha->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Len(wc_Sha* sha, const byte* data, word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_hash_n((byte*)data, len/WC_SHA_BLOCK_SIZE, sha->digest); + #else + MMCAU_SHA1_HashN((byte*)data, len/WC_SHA_BLOCK_SIZE, + (uint32_t*)sha->digest); #endif wolfSSL_CryptHwMutexUnLock(); } @@ -280,12 +297,12 @@ return ret; } - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ - + #else /* Software implementation */ #define USE_SHA_SOFTWARE_IMPL @@ -312,6 +329,10 @@ #endif /* End Hardware Acceleration */ +#ifndef WC_SHA_DATA_ALIGNMENT + /* default to 32-bit alignement */ + #define WC_SHA_DATA_ALIGNMENT 4 +#endif /* Software implementation */ #ifdef USE_SHA_SOFTWARE_IMPL @@ -327,7 +348,7 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) #ifndef XTRANSFORM #define XTRANSFORM(S,B) Transform((S),(B)) - #define blk0(i) (W[i] = sha->buffer[i]) + #define blk0(i) (W[i] = *((word32*)&data[i*sizeof(word32)])) #define blk1(i) (W[(i)&15] = \ rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1)) @@ -356,7 +377,7 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) #define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \ rotlFixed((v),5); (w) = rotlFixed((w),30); - static void Transform(wc_Sha* sha, byte* data) + static int Transform(wc_Sha* sha, const byte* data) { word32 W[WC_SHA_BLOCK_SIZE / sizeof(word32)]; @@ -431,6 +452,8 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) sha->digest[4] += e; (void)data; /* Not used */ + + return 0; } #endif /* !USE_CUSTOM_SHA_TRANSFORM */ @@ -466,17 +489,18 @@ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) return ret; } +/* do block size increments/updates */ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) { + int ret = 0; + word32 blocksLen; byte* local; + word32* local32; - if (sha == NULL ||(data == NULL && len > 0)) { + if (sha == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; } - /* do block size increments */ - local = (byte*)sha->buffer; - #ifdef WOLF_CRYPTO_CB if (sha->devId != INVALID_DEVID) { int ret = wc_CryptoCb_ShaHash(sha, data, len, NULL); @@ -497,37 +521,107 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) if (sha->buffLen >= WC_SHA_BLOCK_SIZE) return BUFFER_E; - while (len) { - word32 add = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen); - XMEMCPY(&local[sha->buffLen], data, add); + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } - sha->buffLen += add; - data += add; - len -= add; + /* add length for final */ + AddLength(sha, len); + + local = (byte*)sha->buffer; + local32 = sha->buffer; + + /* process any remainder from previous operation */ + if (sha->buffLen > 0) { + blocksLen = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen); + XMEMCPY(&local[sha->buffLen], data, blocksLen); + + sha->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (sha->buffLen == WC_SHA_BLOCK_SIZE) { -#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); -#endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW { + ret = XTRANSFORM(sha, (const byte*)local); } else { - esp_sha_process(sha); + esp_sha_process(sha, (const byte*)local); } -#endif - AddLength(sha, WC_SHA_BLOCK_SIZE); + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + sha->buffLen = 0; } } - return 0; + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(sha, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_SHA_BLOCK_SIZE) { + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_SHA_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) + if (((size_t)data % WC_SHA_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA_BLOCK_SIZE); + } + + data += WC_SHA_BLOCK_SIZE; + len -= WC_SHA_BLOCK_SIZE; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha, (const byte*)local32); + } else { + esp_sha_process(sha, (const byte*)local32); + } + #else + ret = XTRANSFORM(sha, (const byte*)local32); + #endif + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + sha->buffLen = len; + } + + return ret; } int wc_ShaFinalRaw(wc_Sha* sha, byte* hash) @@ -552,6 +646,7 @@ int wc_ShaFinalRaw(wc_Sha* sha, byte* hash) int wc_ShaFinal(wc_Sha* sha, byte* hash) { + int ret; byte* local; if (sha == NULL || hash == NULL) { @@ -576,8 +671,6 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) } #endif /* WOLFSSL_ASYNC_CRYPT */ - AddLength(sha, sha->buffLen); /* before adding pads */ - local[sha->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ @@ -585,22 +678,26 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) XMEMSET(&local[sha->buffLen], 0, WC_SHA_BLOCK_SIZE - sha->buffLen); sha->buffLen += WC_SHA_BLOCK_SIZE - sha->buffLen; -#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); -#endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); } else { - esp_sha_process(sha); + esp_sha_process(sha, (const byte*)local); } -#endif + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + sha->buffLen = 0; } XMEMSET(&local[sha->buffLen], 0, WC_SHA_PAD_SIZE - sha->buffLen); @@ -625,26 +722,29 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) 2 * sizeof(word32)); #endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); } else { esp_sha_digest_process(sha, 1); } +#else + ret = XTRANSFORM(sha, (const byte*)local); #endif #ifdef LITTLE_ENDIAN_ORDER ByteReverseWords(sha->digest, sha->digest, WC_SHA_DIGEST_SIZE); #endif + XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE); - return InitSha(sha); /* reset state */ + (void)InitSha(sha); /* reset state */ + + return ret; } #endif /* USE_SHA_SOFTWARE_IMPL */ @@ -707,7 +807,7 @@ int wc_ShaGetHash(wc_Sha* sha, byte* hash) sha->ctx.mode = ESP32_SHA_SW; #endif - + } return ret; } diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index f48a412ee..23dbbc410 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -165,7 +165,7 @@ !defined(WOLFSSL_AFALG_HASH) && !defined(WOLFSSL_DEVCRYPTO_HASH) && \ (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)) && \ (!defined(WOLFSSL_RENESAS_TSIP_CRYPT) || defined(NO_WOLFSSL_RENESAS_TSIP_HASH)) - + static int InitSha256(wc_Sha256* sha256) { int ret = 0; @@ -201,6 +201,9 @@ static int InitSha256(wc_Sha256* sha256) /* in case intel instructions aren't available, plus we need the K[] global */ #define NEED_SOFT_SHA256 + /* requires 128-bit alignment */ + #define WC_SHA256_DATA_ALIGNMENT 16 + /***** Intel AVX1/AVX2 Macro Control Structure @@ -258,43 +261,44 @@ static int InitSha256(wc_Sha256* sha256) */ /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */ - static int Transform_Sha256(wc_Sha256* sha256); + static int Transform_Sha256(wc_Sha256* sha256, const byte* data); #ifdef __cplusplus extern "C" { #endif #if defined(HAVE_INTEL_AVX1) - extern int Transform_Sha256_AVX1(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX1(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, const byte* data, word32 len); #endif #if defined(HAVE_INTEL_AVX2) - extern int Transform_Sha256_AVX2(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX2(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, const byte* data, word32 len); #ifdef HAVE_INTEL_RORX - extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, const byte* data, word32 len); - extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, const byte* data, word32 len); - #endif - #endif + #endif /* HAVE_INTEL_RORX */ + #endif /* HAVE_INTEL_AVX2 */ #ifdef __cplusplus } /* extern "C" */ #endif - static int (*Transform_Sha256_p)(wc_Sha256* sha256); + static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data); /* = _Transform_Sha256 */ static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data, word32 len); /* = NULL */ static int transform_check = 0; static word32 intel_flags; - #define XTRANSFORM(S) (*Transform_Sha256_p)((S)) + + #define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D)) #define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L)) static void Sha256_SetTransform(void) @@ -390,7 +394,7 @@ static int InitSha256(wc_Sha256* sha256) #include "fsl_mmcau.h" #endif - #define XTRANSFORM(S) Transform_Sha256((S)) + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) @@ -418,14 +422,31 @@ static int InitSha256(wc_Sha256* sha256) return ret; } - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { int ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_sha256_hash_n((byte*)sha256->buffer, 1, sha256->digest); + cau_sha256_hash_n((byte*)data, 1, sha256->digest); #else - MMCAU_SHA256_HashN((byte*)sha256->buffer, 1, sha256->digest); + MMCAU_SHA256_HashN((byte*)data, 1, sha256->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, + word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); + #else + MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); #endif wolfSSL_CryptHwMutexUnLock(); } @@ -553,12 +574,12 @@ static int InitSha256(wc_Sha256* sha256) return ret; } - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ - + #else #define NEED_SOFT_SHA256 @@ -593,6 +614,11 @@ static int InitSha256(wc_Sha256* sha256) } #endif /* End Hardware Acceleration */ +#ifndef WC_SHA256_DATA_ALIGNMENT + /* default is 32-bit alignment required */ + #define WC_SHA256_DATA_ALIGNMENT 4 +#endif + #ifdef NEED_SOFT_SHA256 static const ALIGN32 word32 K[64] = { @@ -639,6 +665,10 @@ static int InitSha256(wc_Sha256* sha256) #define g(i) S[(6-i) & 7] #define h(i) S[(7-i) & 7] + #ifndef XTRANSFORM + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) + #endif + #ifndef SHA256_MANY_REGISTERS #define RND(j) \ t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \ @@ -646,12 +676,7 @@ static int InitSha256(wc_Sha256* sha256) d(j) += t0; \ h(j) = t0 + t1 - #ifndef XTRANSFORM - #define XTRANSFORM(S) Transform_Sha256((S)) - #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) - #endif - - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { word32 S[8], t0, t1; int i; @@ -680,7 +705,7 @@ static int InitSha256(wc_Sha256* sha256) S[i] = sha256->digest[i]; for (i = 0; i < 16; i++) - W[i] = sha256->buffer[i]; + W[i] = *((word32*)&data[i*sizeof(word32)]); for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++) W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16]; @@ -713,7 +738,7 @@ static int InitSha256(wc_Sha256* sha256) } #else /* SHA256 version that keeps all data in registers */ - #define SCHED1(j) (W[j] = sha256->buffer[j]) + #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)])) #define SCHED(j) ( \ W[ j & 15] += \ Gamma1(W[(j-2) & 15])+ \ @@ -732,12 +757,7 @@ static int InitSha256(wc_Sha256* sha256) d(j) += t0; \ h(j) = t0 + t1 - #ifndef XTRANSFORM - #define XTRANSFORM(S) Transform_Sha256((S)) - #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) - #endif - - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { word32 S[8], t0, t1; int i; @@ -788,14 +808,18 @@ static int InitSha256(wc_Sha256* sha256) static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) { word32 tmp = sha256->loLen; - if ((sha256->loLen += len) < tmp) + if ((sha256->loLen += len) < tmp) { sha256->hiLen++; /* carry low to high */ + } } + /* do block size increments/updates */ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) { int ret = 0; - byte* local; + word32 blocksLen; + byte* local; + word32* local32; if (sha256 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -806,117 +830,128 @@ static int InitSha256(wc_Sha256* sha256) return 0; } + /* check that internal buffLen is valid */ + if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) { + return BUFFER_E; + } + + /* add length for final */ AddLength(sha256, len); - /* do block size increments */ local = (byte*)sha256->buffer; + local32 = sha256->buffer; - /* check that internal buffLen is valid */ - if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) - return BUFFER_E; - + /* process any remainder from previous operation */ if (sha256->buffLen > 0) { - word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); - XMEMCPY(&local[sha256->buffLen], data, add); + blocksLen = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY(&local[sha256->buffLen], data, blocksLen); - sha256->buffLen += add; - data += add; - len -= add; + sha256->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif + #endif { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); } - #endif + #endif - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); - #else - if(sha256->ctx.mode == ESP32_SHA_INIT) { + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ esp_sha_try_hw_lock(&sha256->ctx); } - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local); } else { - esp_sha256_process(sha256); + esp_sha256_process(sha256, (const byte*)local); } - #endif + #else + ret = XTRANSFORM(sha256, (const byte*)local); + #endif + if (ret == 0) sha256->buffLen = 0; else - len = 0; + len = 0; /* error */ } } - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (Transform_Sha256_Len_p != NULL) { - word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); + /* process blocks */ + #ifdef XTRANSFORM_LEN + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (Transform_Sha256_Len_p != NULL) + #endif + { + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); if (blocksLen > 0) { - /* Byte reversal performed in function if required. */ + /* Byte reversal and alignment handled in function if required */ XTRANSFORM_LEN(sha256, data, blocksLen); data += blocksLen; len -= blocksLen; } } + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) else - #endif - #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \ - defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #endif + #endif /* XTRANSFORM_LEN */ + #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) { while (len >= WC_SHA256_BLOCK_SIZE) { - XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Intel transform function requires use of sha256->buffer */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_SHA256_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ + !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) + if (((size_t)data % WC_SHA256_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA256_BLOCK_SIZE); + } data += WC_SHA256_BLOCK_SIZE; len -= WC_SHA256_BLOCK_SIZE; - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); } #endif - ret = XTRANSFORM(sha256); - if (ret != 0) - break; - } - } - #else - { - while (len >= WC_SHA256_BLOCK_SIZE) { - XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); - data += WC_SHA256_BLOCK_SIZE; - len -= WC_SHA256_BLOCK_SIZE; - - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); - #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ esp_sha_try_hw_lock(&sha256->ctx); } - - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local32); } else { - esp_sha256_process(sha256); + esp_sha256_process(sha256, (const byte*)local32); } - #endif + #else + ret = XTRANSFORM(sha256, (const byte*)local32); + #endif + if (ret != 0) break; } } #endif + /* save remainder */ if (len > 0) { XMEMCPY(local, data, len); sha256->buffLen = len; @@ -959,13 +994,14 @@ static int InitSha256(wc_Sha256* sha256) { int ret; - byte* local = (byte*)sha256->buffer; + byte* local; if (sha256 == NULL) { return BAD_FUNC_ARG; } - local[sha256->buffLen++] = 0x80; /* add 1 */ + local = (byte*)sha256->buffer; + local[sha256->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ if (sha256->buffLen > WC_SHA256_PAD_SIZE) { @@ -973,36 +1009,36 @@ static int InitSha256(wc_Sha256* sha256) WC_SHA256_BLOCK_SIZE - sha256->buffLen); sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; - { #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } - #endif + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_process(sha256, (const byte*)local); } - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ - esp_sha_try_hw_lock(&sha256->ctx); - } - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); - } else { - ret = esp_sha256_process(sha256); - } + ret = XTRANSFORM(sha256, (const byte*)local); #endif if (ret != 0) return ret; sha256->buffLen = 0; } - XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_PAD_SIZE - sha256->buffLen); /* put lengths in bits */ sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) + @@ -1012,12 +1048,12 @@ static int InitSha256(wc_Sha256* sha256) /* store lengths */ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } #endif /* ! length ordering dependent on digest endian type ! */ XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); @@ -1028,30 +1064,31 @@ static int InitSha256(wc_Sha256* sha256) defined(HAVE_INTEL_AVX2) /* Kinetis requires only these bytes reversed */ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords( - &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], - &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], - 2 * sizeof(word32)); - } + { + ByteReverseWords( + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + 2 * sizeof(word32)); + } #endif - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - return XTRANSFORM(sha256); + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_digest_process(sha256, 1); + } #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ - esp_sha_try_hw_lock(&sha256->ctx); - } - if(sha256->ctx.mode == ESP32_SHA_SW){ - return XTRANSFORM(sha256); - } else { - ret = esp_sha256_digest_process(sha256, 1); - } - return ret; + ret = XTRANSFORM(sha256, (const byte*)local); #endif + + return ret; } int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash) @@ -1439,10 +1476,10 @@ void wc_Sha256Free(wc_Sha256* sha256) #elif defined(WOLFSSL_DEVCRYPTO_HASH) /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */ - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ #else diff --git a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h index 0bb2492a9..433066af1 100644 --- a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h +++ b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h @@ -114,12 +114,12 @@ void esp_sha_hw_unlock( void ); struct wc_Sha; int esp_sha_digest_process(struct wc_Sha* sha, byte bockprocess); -int esp_sha_process(struct wc_Sha* sha); +int esp_sha_process(struct wc_Sha* sha, const byte* data); #ifndef NO_SHA256 struct wc_Sha256; int esp_sha256_digest_process(struct wc_Sha256* sha, byte bockprocess); - int esp_sha256_process(struct wc_Sha256* sha); + int esp_sha256_process(struct wc_Sha256* sha, const byte* data); #endif #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) @@ -140,7 +140,7 @@ struct fp_int; int esp_mp_mul(struct fp_int* X, struct fp_int* Y, struct fp_int* Z); int esp_mp_exptmod(struct fp_int* G, struct fp_int* X, word32 Xbits, struct fp_int* P, struct fp_int* Y); -int esp_mp_mulmod(struct fp_int* X, struct fp_int* Y, struct fp_int* M, +int esp_mp_mulmod(struct fp_int* X, struct fp_int* Y, struct fp_int* M, struct fp_int* Z); #endif /* NO_RSA || HAVE_ECC*/ From 6bfe6761d8ebea8bdaaccff1760aeb8f3328d790 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 26 Sep 2019 11:49:33 -0700 Subject: [PATCH 2/4] Disable the new hashing aligned build option by default. Does not increase performance... the memcpy is faster than the alignment check on modern CPU's. Embedded systems may benefit from this though, so leaving support for it in place. --- wolfcrypt/src/md5.c | 6 ------ wolfcrypt/src/sha.c | 6 ------ wolfcrypt/src/sha256.c | 8 -------- 3 files changed, 20 deletions(-) diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index 1b5a86e6e..9a18720f8 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -154,14 +154,8 @@ static int Transform_Len(wc_Md5* md5, const byte* data, word32 len) #define HAVE_MD5_CUST_API #else #define NEED_SOFT_MD5 - #endif /* End Hardware Acceleration */ -#ifndef WC_MD5_DATA_ALIGNMENT - /* default to 32-bit alignement */ - #define WC_MD5_DATA_ALIGNMENT 4 -#endif - #ifdef NEED_SOFT_MD5 #define XTRANSFORM(S,B) Transform((S),(B)) diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index a601e9e28..dd51bd002 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -326,14 +326,8 @@ return ret; } - #endif /* End Hardware Acceleration */ -#ifndef WC_SHA_DATA_ALIGNMENT - /* default to 32-bit alignement */ - #define WC_SHA_DATA_ALIGNMENT 4 -#endif - /* Software implementation */ #ifdef USE_SHA_SOFTWARE_IMPL diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 23dbbc410..d8d43662f 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -201,9 +201,6 @@ static int InitSha256(wc_Sha256* sha256) /* in case intel instructions aren't available, plus we need the K[] global */ #define NEED_SOFT_SHA256 - /* requires 128-bit alignment */ - #define WC_SHA256_DATA_ALIGNMENT 16 - /***** Intel AVX1/AVX2 Macro Control Structure @@ -614,11 +611,6 @@ static int InitSha256(wc_Sha256* sha256) } #endif /* End Hardware Acceleration */ -#ifndef WC_SHA256_DATA_ALIGNMENT - /* default is 32-bit alignment required */ - #define WC_SHA256_DATA_ALIGNMENT 4 -#endif - #ifdef NEED_SOFT_SHA256 static const ALIGN32 word32 K[64] = { From 78f6bbcdb8b25b6533ca89255138f14ae2a7e8e1 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 26 Sep 2019 11:53:24 -0700 Subject: [PATCH 3/4] Adjusted the alignement macro to use `WC_HASH_DATA_ALIGNMENT` for shared settings across hash algos. --- wolfcrypt/src/md5.c | 4 ++-- wolfcrypt/src/sha.c | 4 ++-- wolfcrypt/src/sha256.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index 9a18720f8..94c8152e1 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -379,8 +379,8 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) while (len >= WC_MD5_BLOCK_SIZE) { /* optimization to avoid memcpy if data pointer is properly aligned */ /* Big Endian requires byte swap, so can't use data directly */ - #if defined(WC_MD5_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) - if (((size_t)data % WC_MD5_DATA_ALIGNMENT) == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { local32 = (word32*)data; } else diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index dd51bd002..842aa0d62 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -576,8 +576,8 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) while (len >= WC_SHA_BLOCK_SIZE) { /* optimization to avoid memcpy if data pointer is properly aligned */ /* Little Endian requires byte swap, so can't use data directly */ - #if defined(WC_SHA_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) - if (((size_t)data % WC_SHA_DATA_ALIGNMENT) == 0) { + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { local32 = (word32*)data; } else diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index d8d43662f..ad7e5805c 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -900,9 +900,9 @@ static int InitSha256(wc_Sha256* sha256) /* optimization to avoid memcpy if data pointer is properly aligned */ /* Intel transform function requires use of sha256->buffer */ /* Little Endian requires byte swap, so can't use data directly */ - #if defined(WC_SHA256_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ + #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) - if (((size_t)data % WC_SHA256_DATA_ALIGNMENT) == 0) { + if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { local32 = (word32*)data; } else From b47039b7ecaa44e242ccc08d2c8936dcdb0a33d8 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 27 Sep 2019 09:22:18 -0700 Subject: [PATCH 4/4] Fix for possible unused local32 warning. --- wolfcrypt/src/md5.c | 7 +++---- wolfcrypt/src/sha.c | 5 ++--- wolfcrypt/src/sha256.c | 8 ++++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index 94c8152e1..243a997e4 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -311,8 +311,7 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) { int ret = 0; word32 blocksLen; - byte* local; - word32* local32; + byte* local; if (md5 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -339,7 +338,6 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) AddLength(md5, len); local = (byte*)md5->buffer; - local32 = md5->buffer; /* process any remainder from previous operation */ if (md5->buffLen > 0) { @@ -352,7 +350,7 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) if (md5->buffLen == WC_MD5_BLOCK_SIZE) { #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); + ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); #endif ret = XTRANSFORM(md5, (const byte*)local); @@ -377,6 +375,7 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) } #else while (len >= WC_MD5_BLOCK_SIZE) { + word32* local32 = md5->buffer; /* optimization to avoid memcpy if data pointer is properly aligned */ /* Big Endian requires byte swap, so can't use data directly */ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 842aa0d62..b2dee4323 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -489,7 +489,6 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) int ret = 0; word32 blocksLen; byte* local; - word32* local32; if (sha == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -524,7 +523,6 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) AddLength(sha, len); local = (byte*)sha->buffer; - local32 = sha->buffer; /* process any remainder from previous operation */ if (sha->buffLen > 0) { @@ -537,7 +535,7 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) if (sha->buffLen == WC_SHA_BLOCK_SIZE) { #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); #endif #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ @@ -574,6 +572,7 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) } #else while (len >= WC_SHA_BLOCK_SIZE) { + word32* local32 = sha->buffer; /* optimization to avoid memcpy if data pointer is properly aligned */ /* Little Endian requires byte swap, so can't use data directly */ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index ad7e5805c..9948c6c11 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -810,8 +810,7 @@ static int InitSha256(wc_Sha256* sha256) { int ret = 0; word32 blocksLen; - byte* local; - word32* local32; + byte* local; if (sha256 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -831,7 +830,6 @@ static int InitSha256(wc_Sha256* sha256) AddLength(sha256, len); local = (byte*)sha256->buffer; - local32 = sha256->buffer; /* process any remainder from previous operation */ if (sha256->buffLen > 0) { @@ -848,7 +846,8 @@ static int InitSha256(wc_Sha256* sha256) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif { - ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); } #endif @@ -897,6 +896,7 @@ static int InitSha256(wc_Sha256* sha256) #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) { while (len >= WC_SHA256_BLOCK_SIZE) { + word32* local32 = sha256->buffer; /* optimization to avoid memcpy if data pointer is properly aligned */ /* Intel transform function requires use of sha256->buffer */ /* Little Endian requires byte swap, so can't use data directly */