From 9044f709c11ba20a679894a3cd6a34530ebf63cc Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 19 Jan 2021 13:54:53 -0800 Subject: [PATCH] Add support for `STM32_AESGCM_PARTIAL` build option to speedup platforms that allow AAD header sizes that are not a multiple of 4 bytes. ZD 11364. --- IDE/STM32Cube/README.md | 31 ++++++++++++++++ IDE/STM32Cube/default_conf.ftl | 1 + wolfcrypt/src/aes.c | 66 +++++++++++++++++++++++++--------- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/IDE/STM32Cube/README.md b/IDE/STM32Cube/README.md index 4f75f04ce..1109d5f45 100644 --- a/IDE/STM32Cube/README.md +++ b/IDE/STM32Cube/README.md @@ -42,6 +42,37 @@ To enable the latest Cube HAL support please define `STM32_HAL_V2`. If you'd like to use the older Standard Peripheral library undefine `WOLFSSL_STM32_CUBEMX`. +With STM32 Cube HAL v2 some AES GCM hardware has a limitation for the AAD header, which must be a multiple of 4 bytes. + +If using `STM32_AESGCM_PARTIAL` with the following patch it will enable use for all AAD header sizes. The `STM32Cube_FW_F7_V1.16.0` patch is: + +``` +diff --git a/Drivers/STM32F7xx_HAL_Driver/Inc/stm32f7xx_hal_cryp.h b/Drivers/STM32F7xx_HAL_Driver/Inc/stm32f7xx_hal_cryp.h +--- a/Drivers/STM32F7xx_HAL_Driver/Inc/stm32f7xx_hal_cryp.h ++++ b/Drivers/STM32F7xx_HAL_Driver/Inc/stm32f7xx_hal_cryp.h +@@ -63,6 +63,7 @@ typedef struct + GCM : also known as Additional Authentication Data + CCM : named B1 composed of the associated data length and Associated Data. */ + uint32_t HeaderSize; /*!< The size of header buffer in word */ ++ uint32_t HeaderPadSize; /*!< The size of padding in bytes added to actual header data to pad it to a multiple of 32 bits */ + uint32_t *B0; /*!< B0 is first authentication block used only in AES CCM mode */ + uint32_t DataWidthUnit; /*!< Data With Unit, this parameter can be value of @ref CRYP_Data_Width_Unit*/ + uint32_t KeyIVConfigSkip; /*!< CRYP peripheral Key and IV configuration skip, to config Key and Initialization + +diff --git a/Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal_cryp_ex.c b/Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal_cryp_ex.c +--- a/Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal_cryp_ex.c ++++ b/Drivers/STM32F7xx_HAL_Driver/Src/stm32f7xx_hal_cryp_ex.c +@@ -132,6 +132,8 @@ HAL_StatusTypeDef HAL_CRYPEx_AESGCM_GenerateAuthTAG(CRYP_HandleTypeDef *hcryp, u + uint64_t inputlength = (uint64_t)hcryp->SizesSum * 8U; /* input length in bits */ + uint32_t tagaddr = (uint32_t)AuthTag; + ++ headerlength -= ((uint64_t)(hcryp->Init.HeaderPadSize) * 8U); /* Decrement the header size removing the pad size */ ++ + if (hcryp->State == HAL_CRYP_STATE_READY) + { + /* Process locked */ +``` + If you are using FreeRTOS make sure your `FreeRTOSConfig.h` has its `configTOTAL_HEAP_SIZE` increased. The TLS client/server benchmark example requires about 76 KB for allocated tasks (with stack) and peak heap. This uses both a TLS client and server to test a TLS connection locally for each enabled TLS cipher suite. diff --git a/IDE/STM32Cube/default_conf.ftl b/IDE/STM32Cube/default_conf.ftl index 3b7cfcadc..3f67578e5 100644 --- a/IDE/STM32Cube/default_conf.ftl +++ b/IDE/STM32Cube/default_conf.ftl @@ -86,6 +86,7 @@ extern ${variable.value} ${variable.name}; #undef NO_STM32_CRYPTO #define STM32_HAL_V2 #define HAL_CONSOLE_UART huart2 + #define STM32_AESGCM_PARTIAL /* allow partial blocks and add auth info (header) */ #elif defined(STM32H753xx) #define WOLFSSL_STM32H7 #undef NO_STM32_HASH diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 805d777d6..3f24dfa52 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -6430,6 +6430,9 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, #ifdef STM32_CRYPTO_AES_GCM /* this function supports inline encrypt */ +/* define STM32_AESGCM_PARTIAL for newer STM Cube HAL's with workaround + for handling partial packets to improve auth tag calculation performance by + using hardware */ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, @@ -6455,7 +6458,7 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz word32 ctr[AES_BLOCK_SIZE/sizeof(word32)]; word32 authhdr[AES_BLOCK_SIZE/sizeof(word32)]; byte* authInPadded = NULL; - int authPadSz, wasAlloc = 0; + int authPadSz, wasAlloc = 0, useSwGhash = 0; ret = wc_AesGetKeySize(aes, &keySize); if (ret != 0) @@ -6501,6 +6504,17 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz authInPadded = (byte*)authIn; } + /* for cases where hardware cannot be used for authTag calculate it */ + /* if IV is not 12 calculate GHASH using software */ + if (ivSz != GCM_NONCE_MID_SZ +#ifndef STM32_AESGCM_PARTIAL + /* or authIn is not a multiple of 4 */ + || authPadSz != authInSz || sz == 0 || partial != 0 +#endif + ) { + useSwGhash = 1; + } + /* Hardware requires counter + 1 */ IncrementGcmCounter((byte*)ctr); @@ -6513,16 +6527,19 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; #if defined(STM32_HAL_V2) - hcryp.Init.Algorithm = CRYP_AES_GCM; + hcryp.Init.Algorithm = CRYP_AES_GCM; hcryp.Init.HeaderSize = authPadSz/sizeof(word32); + #ifdef STM32_AESGCM_PARTIAL + hcryp.Init.HeaderPadSize = authPadSz - authInSz; + #endif ByteReverseWords(partialBlock, ctr, AES_BLOCK_SIZE); hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; HAL_CRYP_Init(&hcryp); /* GCM payload phase - can handle partial blocks */ - if (status == HAL_OK) { status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK && !useSwGhash) { /* Compute the authTag */ status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, STM32_HAL_TIMEOUT); @@ -6560,7 +6577,7 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz (uint8_t*)partialBlock, STM32_HAL_TIMEOUT); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); } - if (status == HAL_OK) { + if (status == HAL_OK && !useSwGhash) { /* GCM final phase */ hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, (uint8_t*)tag, STM32_HAL_TIMEOUT); @@ -6581,7 +6598,7 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz (uint8_t*)partialBlock, STM32_HAL_TIMEOUT); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); } - if (status == HAL_OK) { + if (status == HAL_OK && !useSwGhash) { /* Compute the authTag */ status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, (uint8_t*)tag, STM32_HAL_TIMEOUT); } @@ -6606,13 +6623,13 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz if (ret == 0) { /* return authTag */ if (authTag) { - /* For STM32 GCM fallback to software if partial AES block or IV != 12 */ - if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) { + if (useSwGhash) { GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); wc_AesEncrypt(aes, (byte*)ctrInit, (byte*)tag); xorbuf(authTag, tag, authTagSz); } else { + /* use hardware calculated tag */ XMEMCPY(authTag, tag, authTagSz); } } @@ -6939,18 +6956,31 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, * For TLS blocks the authTag is after the output buffer, so save it */ XMEMCPY(tagExpected, authTag, authTagSz); + /* Authentication buffer - must be 4-byte multiple zero padded */ + authPadSz = authInSz % sizeof(word32); + if (authPadSz != 0) { + authPadSz = authInSz + sizeof(word32) - authPadSz; + } + else { + authPadSz = authInSz; + } + /* for cases where hardware cannot be used for authTag calculate it */ - if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) { + /* if IV is not 12 calculate GHASH using software */ + if (ivSz != GCM_NONCE_MID_SZ || sz == 0 || partial != 0 +#ifndef STM32_AESGCM_PARTIAL + /* or authIn is not a multiple of 4 */ + || authPadSz != authInSz +#endif + ) { GHASH(aes, authIn, authInSz, in, sz, (byte*)tag, sizeof(tag)); wc_AesEncrypt(aes, (byte*)ctr, (byte*)partialBlock); xorbuf(tag, partialBlock, sizeof(tag)); tagComputed = 1; } - /* Authentication buffer - must be 4-byte multiple zero padded */ - authPadSz = authInSz % sizeof(word32); - if (authPadSz != 0) { - authPadSz = authInSz + sizeof(word32) - authPadSz; + /* if using hardware for authentication tag make sure its aligned and zero padded */ + if (authPadSz != authInSz && !tagComputed) { if (authPadSz <= sizeof(authhdr)) { authInPadded = (byte*)authhdr; } @@ -6966,7 +6996,6 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, XMEMSET(authInPadded, 0, authPadSz); XMEMCPY(authInPadded, authIn, authInSz); } else { - authPadSz = authInSz; authInPadded = (byte*)authIn; } @@ -6982,18 +7011,21 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; #if defined(STM32_HAL_V2) - hcryp.Init.HeaderSize = authPadSz/sizeof(word32); hcryp.Init.Algorithm = CRYP_AES_GCM; + hcryp.Init.HeaderSize = authPadSz/sizeof(word32); + #ifdef STM32_AESGCM_PARTIAL + hcryp.Init.HeaderPadSize = authPadSz - authInSz; + #endif ByteReverseWords(partialBlock, ctr, AES_BLOCK_SIZE); hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; HAL_CRYP_Init(&hcryp); /* GCM payload phase - can handle partial blocks */ - status = HAL_CRYP_Decrypt(&hcryp, (word32*)in, - (blocks * AES_BLOCK_SIZE) + partial, (word32*)out, STM32_HAL_TIMEOUT); + status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); if (status == HAL_OK && tagComputed == 0) { /* Compute the authTag */ - status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (word32*)tag, + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, STM32_HAL_TIMEOUT); } #elif defined(STM32_CRYPTO_AES_ONLY)