From 502cbc384714f0c1da426d031a798e56b5563c7a Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 6 Jun 2022 12:11:06 -0700 Subject: [PATCH] Fix for STM32U5 hash/crypto support. ZD 14305. --- wolfcrypt/src/md5.c | 2 +- wolfcrypt/src/port/st/stm32.c | 193 +++++++++++++++++++++++------- wolfcrypt/src/sha.c | 2 +- wolfcrypt/src/sha256.c | 4 +- wolfssl/openssl/sha.h | 13 +- wolfssl/wolfcrypt/port/st/stm32.h | 21 +++- 6 files changed, 179 insertions(+), 56 deletions(-) diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index 53f011baa..f5d85c28b 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -78,7 +78,7 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, - data, len); + data, len, WC_MD5_BLOCK_SIZE); wolfSSL_CryptHwMutexUnLock(); } return ret; diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 3879ba177..240f8604c 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -46,9 +46,7 @@ #ifdef STM32_HASH -#ifdef WOLFSSL_STM32L4 - #define HASH_STR_NBW HASH_STR_NBLW -#endif +/* #define DEBUG_STM32_HASH */ /* User can override STM32_HASH_CLOCK_ENABLE and STM32_HASH_CLOCK_DISABLE */ #ifndef STM32_HASH_CLOCK_ENABLE @@ -77,8 +75,22 @@ #define STM32_HASH_CLOCK_DISABLE(ctx) wc_Stm32_Hash_Clock_Disable(ctx) #endif + /* STM32 Port Internal Functions */ -static WC_INLINE void wc_Stm32_Hash_SaveContext(STM32_HASH_Context* ctx) +static void wc_Stm32_Hash_NumValidBits(word32 len) +{ + /* calculate number of valid bits in last word */ + /* NBLW = 0x00 (all 32-bits are valid) */ + word32 nbvalidbytesdata = (len % STM32_HASH_REG_SIZE); + HASH->STR &= ~HASH_STR_NBW; + HASH->STR |= (8 * nbvalidbytesdata) & HASH_STR_NBW; + +#ifdef DEBUG_STM32_HASH + printf("STM Valid Last bits (%d)\n", 8 * nbvalidbytesdata); +#endif +} + +static void wc_Stm32_Hash_SaveContext(STM32_HASH_Context* ctx) { int i; @@ -89,13 +101,46 @@ static WC_INLINE void wc_Stm32_Hash_SaveContext(STM32_HASH_Context* ctx) for (i=0; iHASH_CSR[i] = HASH->CSR[i]; } + +#ifdef DEBUG_STM32_HASH + printf("STM Save CR %lx, IMR %lx, STR %lx\n", + HASH->CR, HASH->IMR, HASH->STR); +#endif } -static WC_INLINE int wc_Stm32_Hash_RestoreContext(STM32_HASH_Context* ctx) +static void wc_Stm32_Hash_RestoreContext(STM32_HASH_Context* ctx, int algo) { int i; - if (ctx->HASH_CR != 0) { + if (ctx->HASH_CR == 0) { + /* init content */ + + #if defined(HASH_IMR_DINIE) && defined(HASH_IMR_DCIE) + /* enable IRQ's */ + HASH->IMR |= (HASH_IMR_DINIE | HASH_IMR_DCIE); + #endif + + /* reset the control register */ + HASH->CR &= ~(HASH_CR_ALGO | HASH_CR_MODE | HASH_CR_DATATYPE + #ifdef HASH_CR_LKEY + | HASH_CR_LKEY + #endif + ); + + /* configure algorithm, mode and data type */ + HASH->CR |= (algo | HASH_ALGOMODE_HASH | HASH_DATATYPE_8B); + + /* reset HASH processor */ + HASH->CR |= HASH_CR_INIT; + + /* by default mark all bits valid */ + wc_Stm32_Hash_NumValidBits(0); + +#ifdef DEBUG_STM32_HASH + printf("STM Init algo %x\n", algo); +#endif + } + else { /* restore context registers */ HASH->IMR = ctx->HASH_IMR; HASH->STR = ctx->HASH_STR; @@ -108,12 +153,15 @@ static WC_INLINE int wc_Stm32_Hash_RestoreContext(STM32_HASH_Context* ctx) for (i=0; iCSR[i] = ctx->HASH_CSR[i]; } - return 1; + +#ifdef DEBUG_STM32_HASH + printf("STM Restore CR %lx, IMR %lx, STR %lx\n", + HASH->CR, HASH->IMR, HASH->STR); +#endif } - return 0; } -static WC_INLINE void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) +static void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) { word32 digest[HASH_MAX_DIGEST/sizeof(word32)]; @@ -137,17 +185,35 @@ static WC_INLINE void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) ByteReverseWords(digest, digest, digestSize); XMEMCPY(hash, digest, digestSize); + +#ifdef DEBUG_STM32_HASH + { + word32 i; + printf("STM Digest %d\n", digestSize); + for (i=0; iSR & HASH_SR_BUSY) && ++timeout < STM32_HASH_TIMEOUT) { + (void)stmCtx; + + /* wait until hash digest is complete */ + while ((HASH->SR & HASH_SR_BUSY) && + #ifdef HASH_IMR_DCIE + (HASH->SR & HASH_SR_DCIS) == 0 && + #endif + ++timeout < STM32_HASH_TIMEOUT) { + }; + +#ifdef DEBUG_STM32_HASH + printf("STM Wait done %d, HASH->SR %lx\n", timeout, HASH->SR); +#endif - } /* verify timeout did not occur */ if (timeout >= STM32_HASH_TIMEOUT) { return WC_TIMEOUT_E; @@ -155,22 +221,58 @@ static WC_INLINE int wc_Stm32_Hash_WaitDone(void) return 0; } +static void wc_Stm32_Hash_Data(STM32_HASH_Context* stmCtx, word32 len) +{ + word32 i, blocks; + if (len > stmCtx->buffLen) + len = stmCtx->buffLen; + + /* calculate number of 32-bit blocks */ + blocks = ((len + STM32_HASH_REG_SIZE-1) / STM32_HASH_REG_SIZE); +#ifdef DEBUG_STM32_HASH + printf("STM DIN %d blocks\n", blocks); +#endif + for (i=0; ibuffer[i]); + #endif + HASH->DIN = stmCtx->buffer[i]; + } + stmCtx->loLen += len; /* total */ + stmCtx->buffLen -= len; + if (stmCtx->buffLen > 0) { + XMEMMOVE(stmCtx->buffer, (byte*)stmCtx->buffer+len, stmCtx->buffLen); + } +} + + +/* STM32 Port Exposed Functions */ void wc_Stm32_Hash_Init(STM32_HASH_Context* stmCtx) { /* clear context */ + /* this also gets called after finish */ XMEMSET(stmCtx, 0, sizeof(STM32_HASH_Context)); } int wc_Stm32_Hash_Update(STM32_HASH_Context* stmCtx, word32 algo, - const byte* data, int len) + const byte* data, word32 len, word32 blockSize) { int ret = 0; byte* local = (byte*)stmCtx->buffer; int wroteToFifo = 0; + const word32 fifoSz = (STM32_HASH_FIFO_SIZE * STM32_HASH_REG_SIZE); + + if (blockSize > fifoSz) + blockSize = fifoSz; + +#ifdef DEBUG_STM32_HASH + printf("STM Hash Update: algo %x, len %d, blockSz %d\n", + algo, len, blockSize); +#endif /* check that internal buffLen is valid */ - if (stmCtx->buffLen >= STM32_HASH_REG_SIZE) { + if (stmCtx->buffLen > blockSize) { return BUFFER_E; } @@ -178,36 +280,38 @@ int wc_Stm32_Hash_Update(STM32_HASH_Context* stmCtx, word32 algo, STM32_HASH_CLOCK_ENABLE(stmCtx); /* restore hash context or init as new hash */ - if (wc_Stm32_Hash_RestoreContext(stmCtx) == 0) { - /* reset the control register */ - HASH->CR &= ~(HASH_CR_ALGO | HASH_CR_DATATYPE | HASH_CR_MODE); + wc_Stm32_Hash_RestoreContext(stmCtx, algo); - /* configure algorithm, mode and data type */ - HASH->CR |= (algo | HASH_ALGOMODE_HASH | HASH_DATATYPE_8B); - - /* reset HASH processor */ - HASH->CR |= HASH_CR_INIT; - } - - /* write 4-bytes at a time into FIFO */ + /* write blocks to FIFO */ while (len) { - word32 add = min(len, STM32_HASH_REG_SIZE - stmCtx->buffLen); + word32 fillBlockSz = blockSize, add; + + /* if FIFO already has bytes written then fill remainder first */ + if (stmCtx->fifoBytes > 0) { + fillBlockSz -= stmCtx->fifoBytes; + stmCtx->fifoBytes = 0; + } + + add = min(len, fillBlockSz - stmCtx->buffLen); XMEMCPY(&local[stmCtx->buffLen], data, add); stmCtx->buffLen += add; data += add; len -= add; - if (stmCtx->buffLen == STM32_HASH_REG_SIZE) { + if (len > 0 && stmCtx->buffLen == fillBlockSz) { + wc_Stm32_Hash_Data(stmCtx, stmCtx->buffLen); wroteToFifo = 1; - HASH->DIN = *(word32*)stmCtx->buffer; - - stmCtx->loLen += STM32_HASH_REG_SIZE; - stmCtx->buffLen = 0; } } if (wroteToFifo) { + /* If we wrote a block send one more 32-bit to FIFO to trigger + * start. We cannot leave 16 deep FIFO filled before saving off + * context */ + wc_Stm32_Hash_Data(stmCtx, 4); + stmCtx->fifoBytes += 4; + /* save hash state for next operation */ wc_Stm32_Hash_SaveContext(stmCtx); } @@ -219,33 +323,34 @@ int wc_Stm32_Hash_Update(STM32_HASH_Context* stmCtx, word32 algo, } int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, - byte* hash, int digestSize) + byte* hash, word32 digestSize) { int ret = 0; - word32 nbvalidbitsdata = 0; + +#ifdef DEBUG_STM32_HASH + printf("STM Hash Final: algo %x, digestSz %d\n", algo, digestSize); +#endif /* turn on hash clock */ STM32_HASH_CLOCK_ENABLE(stmCtx); - /* restore hash state */ - wc_Stm32_Hash_RestoreContext(stmCtx); + /* restore hash context or init as new hash */ + wc_Stm32_Hash_RestoreContext(stmCtx, algo); /* finish reading any trailing bytes into FIFO */ if (stmCtx->buffLen > 0) { - HASH->DIN = *(word32*)stmCtx->buffer; - stmCtx->loLen += stmCtx->buffLen; + /* send remainder of data */ + wc_Stm32_Hash_Data(stmCtx, stmCtx->buffLen); } /* calculate number of valid bits in last word */ - nbvalidbitsdata = 8 * (stmCtx->loLen % STM32_HASH_REG_SIZE); - HASH->STR &= ~HASH_STR_NBW; - HASH->STR |= nbvalidbitsdata; + wc_Stm32_Hash_NumValidBits(stmCtx->loLen + stmCtx->buffLen); /* start hash processor */ HASH->STR |= HASH_STR_DCAL; /* wait for hash done */ - ret = wc_Stm32_Hash_WaitDone(); + ret = wc_Stm32_Hash_WaitDone(stmCtx); if (ret == 0) { /* read message digest */ wc_Stm32_Hash_GetDigest(hash, digestSize); diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 463fb6572..8113997fa 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -137,7 +137,7 @@ ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { ret = wc_Stm32_Hash_Update(&sha->stmCtx, HASH_AlgoSelection_SHA1, - data, len); + data, len, WC_SHA_BLOCK_SIZE); wolfSSL_CryptHwMutexUnLock(); } return ret; diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 0b231669b..6da015153 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -567,7 +567,7 @@ static int InitSha256(wc_Sha256* sha256) ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { ret = wc_Stm32_Hash_Update(&sha256->stmCtx, - HASH_AlgoSelection_SHA256, data, len); + HASH_AlgoSelection_SHA256, data, len, WC_SHA256_BLOCK_SIZE); wolfSSL_CryptHwMutexUnLock(); } return ret; @@ -1405,7 +1405,7 @@ static int InitSha256(wc_Sha256* sha256) ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { ret = wc_Stm32_Hash_Update(&sha224->stmCtx, - HASH_AlgoSelection_SHA224, data, len); + HASH_AlgoSelection_SHA224, data, len, WC_SHA224_BLOCK_SIZE); wolfSSL_CryptHwMutexUnLock(); } return ret; diff --git a/wolfssl/openssl/sha.h b/wolfssl/openssl/sha.h index 17af1ed2b..f85702aee 100644 --- a/wolfssl/openssl/sha.h +++ b/wolfssl/openssl/sha.h @@ -93,6 +93,13 @@ typedef WOLFSSL_SHA_CTX SHA_CTX; #endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */ #endif /* !NO_SHA */ +/* adder for HW crypto */ +#ifdef STM32_HASH +#define CTX_SHA2_HW_ADDER 30 +#else +#define CTX_SHA2_HW_ADDER 0 +#endif + #ifdef WOLFSSL_SHA224 /* Using ALIGN16 because when AES-NI is enabled digest and buffer in Sha256 @@ -100,7 +107,8 @@ typedef WOLFSSL_SHA_CTX SHA_CTX; * to Sha224, is expected to also be 16 byte aligned addresses. */ typedef struct WOLFSSL_SHA224_CTX { /* big enough to hold wolfcrypt Sha224, but check on init */ - ALIGN16 void* holder[(272 + WC_ASYNC_DEV_SIZE) / sizeof(void*)]; + ALIGN16 void* holder[(274 + CTX_SHA2_HW_ADDER + WC_ASYNC_DEV_SIZE) / + sizeof(void*)]; } WOLFSSL_SHA224_CTX; WOLFSSL_API int wolfSSL_SHA224_Init(WOLFSSL_SHA224_CTX* sha); @@ -133,7 +141,8 @@ typedef WOLFSSL_SHA224_CTX SHA224_CTX; * to Sha256, is expected to also be 16 byte aligned addresses. */ typedef struct WOLFSSL_SHA256_CTX { /* big enough to hold wolfcrypt Sha256, but check on init */ - ALIGN16 void* holder[(272 + WC_ASYNC_DEV_SIZE) / sizeof(void*)]; + ALIGN16 void* holder[(274 + CTX_SHA2_HW_ADDER + WC_ASYNC_DEV_SIZE) / + sizeof(void*)]; } WOLFSSL_SHA256_CTX; WOLFSSL_API int wolfSSL_SHA256_Init(WOLFSSL_SHA256_CTX* sha256); diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 78112328b..3c3738e1d 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -60,6 +60,11 @@ /* STM32 register size in bytes */ #define STM32_HASH_REG_SIZE 4 +#if defined(WOLFSSL_STM32U5) +#define STM32_HASH_FIFO_SIZE 16 /* FIFO is 16 deep 32-bits wide */ +#else +#define STM32_HASH_FIFO_SIZE 1 +#endif /* STM32 Hash Context */ typedef struct { @@ -70,19 +75,20 @@ typedef struct { uint32_t HASH_CSR[HASH_CR_SIZE]; /* Hash state / buffers */ - word32 buffer[STM32_HASH_REG_SIZE / sizeof(word32)]; /* partial word buffer */ + word32 buffer[STM32_HASH_FIFO_SIZE]; /* partial word buffer */ word32 buffLen; /* partial word remain */ word32 loLen; /* total update bytes (only lsb 6-bits is used for nbr valid bytes in last word) */ + int fifoBytes; /* number of currently filled FIFO bytes */ } STM32_HASH_Context; /* API's */ void wc_Stm32_Hash_Init(STM32_HASH_Context* stmCtx); int wc_Stm32_Hash_Update(STM32_HASH_Context* stmCtx, word32 algo, - const byte* data, int len); + const byte* data, word32 len, word32 blockSize); int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, - byte* hash, int digestSize); + byte* hash, word32 digestSize); #endif /* STM32_HASH */ @@ -92,7 +98,8 @@ int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, #ifndef NO_AES #if !defined(STM32_CRYPTO_AES_GCM) && (defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L4) || \ - defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7)) + defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7) || \ + defined(WOLFSSL_STM32U5)) /* Hardware supports AES GCM acceleration */ #define STM32_CRYPTO_AES_GCM #endif @@ -102,7 +109,8 @@ int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, #define CRYP AES1 #define STM32_HAL_V2 #endif - #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) + #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ + defined(WOLFSSL_STM32U5) #ifdef WOLFSSL_STM32L4 #define STM32_CRYPTO_AES_ONLY /* crypto engine only supports AES */ #endif @@ -114,7 +122,8 @@ int wc_Stm32_Hash_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Detect newer CubeMX crypto HAL (HAL_CRYP_Encrypt / HAL_CRYP_Decrypt) */ #if !defined(STM32_HAL_V2) && defined(CRYP_AES_GCM) && \ - (defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7)) + (defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L5) || \ + defined(WOLFSSL_STM32H7) || defined(WOLFSSL_STM32U5)) #define STM32_HAL_V2 #endif