Merge pull request #2196 from dgarske/stm32_aes_gcm

Improvement for STM32 GCM performance
This commit is contained in:
toddouska
2019-04-09 14:31:27 -07:00
committed by GitHub

View File

@ -3378,6 +3378,18 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
return; return;
} }
} }
#ifdef STM32_CRYPTO_AES_GCM
static WC_INLINE void DecrementGcmCounter(byte* inOutCtr)
{
int i;
/* in network byte order so start at end and work back */
for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */
return;
}
}
#endif /* STM32_CRYPTO_AES_GCM */
#endif /* !FREESCALE_LTC_AES_GCM */ #endif /* !FREESCALE_LTC_AES_GCM */
#if defined(GCM_SMALL) || defined(GCM_TABLE) #if defined(GCM_SMALL) || defined(GCM_TABLE)
@ -5196,8 +5208,9 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
int status = 0; int status = 0;
word32 blocks = sz / AES_BLOCK_SIZE; word32 blocks = sz / AES_BLOCK_SIZE;
word32 partial = sz % AES_BLOCK_SIZE; word32 partial = sz % AES_BLOCK_SIZE;
word32 tag[AES_BLOCK_SIZE/sizeof(word32)]; byte tag[AES_BLOCK_SIZE];
word32 partialBlock[AES_BLOCK_SIZE/sizeof(word32)]; byte partialBlock[AES_BLOCK_SIZE];
byte ctr[AES_BLOCK_SIZE];
byte* authInPadded = NULL; byte* authInPadded = NULL;
int authPadSz; int authPadSz;
@ -5211,10 +5224,16 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
return ret; return ret;
#endif #endif
XMEMCPY(aes->reg, iv, ivSz); XMEMSET(ctr, 0, AES_BLOCK_SIZE);
if (ivSz == GCM_NONCE_MID_SZ) { if (ivSz == GCM_NONCE_MID_SZ) {
*((byte*)aes->reg + (AES_BLOCK_SIZE - 1)) = STM32_GCM_IV_START; XMEMCPY(ctr, iv, ivSz);
ctr[AES_BLOCK_SIZE - 1] = 1;
} }
else {
GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
}
/* Hardware requires counter + 1 */
IncrementGcmCounter(ctr);
if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
/* Need to pad the AAD to a full block with zeros. */ /* Need to pad the AAD to a full block with zeros. */
@ -5232,7 +5251,7 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
} }
#ifdef WOLFSSL_STM32_CUBEMX #ifdef WOLFSSL_STM32_CUBEMX
hcryp.Init.pInitVect = (uint8_t*)aes->reg; hcryp.Init.pInitVect = (uint8_t*)ctr;
hcryp.Init.Header = authInPadded; hcryp.Init.Header = authInPadded;
hcryp.Init.HeaderSize = authInSz; hcryp.Init.HeaderSize = authInSz;
@ -5260,15 +5279,14 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
/* GCM payload phase - partial remainder */ /* GCM payload phase - partial remainder */
XMEMSET(partialBlock, 0, sizeof(partialBlock)); XMEMSET(partialBlock, 0, sizeof(partialBlock));
XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)partialBlock, partial, status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
(byte*)partialBlock, STM32_HAL_TIMEOUT); partialBlock, STM32_HAL_TIMEOUT);
XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
} }
if (status == HAL_OK) { if (status == HAL_OK) {
/* GCM final phase */ /* GCM final phase */
hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE;
status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, (byte*)tag, status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
STM32_HAL_TIMEOUT);
} }
#else #else
HAL_CRYP_Init(&hcryp); HAL_CRYP_Init(&hcryp);
@ -5279,14 +5297,13 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
/* GCM payload phase - partial remainder */ /* GCM payload phase - partial remainder */
XMEMSET(partialBlock, 0, sizeof(partialBlock)); XMEMSET(partialBlock, 0, sizeof(partialBlock));
XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)partialBlock, partial, status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial,
(byte*)partialBlock, STM32_HAL_TIMEOUT); partialBlock, STM32_HAL_TIMEOUT);
XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
} }
if (status == HAL_OK) { if (status == HAL_OK) {
/* Compute the authTag */ /* Compute the authTag */
status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, (byte*)tag, status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
STM32_HAL_TIMEOUT);
} }
#endif #endif
@ -5296,11 +5313,11 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
#else /* STD_PERI_LIB */ #else /* STD_PERI_LIB */
ByteReverseWords(keyCopy, (word32*)aes->key, keySize); ByteReverseWords(keyCopy, (word32*)aes->key, keySize);
status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)aes->reg, status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr,
(uint8_t*)keyCopy, keySize * 8, (uint8_t*)keyCopy, keySize * 8,
(uint8_t*)in, sz, (uint8_t*)in, sz,
(uint8_t*)authInPadded,authInSz, (uint8_t*)authInPadded, authInSz,
(uint8_t*)out, (byte*)tag); (uint8_t*)out, tag);
if (status != SUCCESS) if (status != SUCCESS)
ret = AES_GCM_AUTH_E; ret = AES_GCM_AUTH_E;
#endif /* WOLFSSL_STM32_CUBEMX */ #endif /* WOLFSSL_STM32_CUBEMX */
@ -5308,21 +5325,13 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz
if (ret == 0) { if (ret == 0) {
/* return authTag */ /* return authTag */
if (authTag) { if (authTag) {
/* for partial blocks tag hardware will not compute tag correctly, /* STM32 GCM won't compute Auth correctly for partial or
so use software to compute tag */ when IV != 12, so use software here */
if (partial != 0) { if (partial != 0 || ivSz != GCM_NONCE_MID_SZ) {
byte initialCounter[AES_BLOCK_SIZE]; DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
if (ivSz == GCM_NONCE_MID_SZ) {
XMEMCPY(initialCounter, iv, ivSz);
initialCounter[AES_BLOCK_SIZE - 1] = 1;
}
else {
GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
}
GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
wc_AesEncrypt(aes, initialCounter, (byte*)tag); wc_AesEncrypt(aes, ctr, tag);
xorbuf(authTag, (byte*)tag, authTagSz); xorbuf(authTag, tag, authTagSz);
} }
else { else {
XMEMCPY(authTag, tag, authTagSz); XMEMCPY(authTag, tag, authTagSz);
@ -5498,13 +5507,11 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
#endif /* WOLFSSL_ASYNC_CRYPT */ #endif /* WOLFSSL_ASYNC_CRYPT */
#ifdef STM32_CRYPTO_AES_GCM #ifdef STM32_CRYPTO_AES_GCM
/* STM hardware only supports IV of 12 thru 16 bytes */
/* The STM standard peripheral library API's doesn't support partial blocks */ /* The STM standard peripheral library API's doesn't support partial blocks */
if (ivSz >= GCM_NONCE_MID_SZ && ivSz <= GCM_NONCE_MAX_SZ
#ifdef STD_PERI_LIB #ifdef STD_PERI_LIB
&& partial == 0 if (partial == 0)
#endif #endif
) { {
return wc_AesGcmEncrypt_STM32( return wc_AesGcmEncrypt_STM32(
aes, out, in, sz, iv, ivSz, aes, out, in, sz, iv, ivSz,
authTag, authTagSz, authIn, authInSz); authTag, authTagSz, authIn, authInSz);
@ -5596,8 +5603,9 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
int status; int status;
word32 blocks = sz / AES_BLOCK_SIZE; word32 blocks = sz / AES_BLOCK_SIZE;
word32 partial = sz % AES_BLOCK_SIZE; word32 partial = sz % AES_BLOCK_SIZE;
word32 tag[AES_BLOCK_SIZE/sizeof(word32)]; byte tag[AES_BLOCK_SIZE];
word32 partialBlock[AES_BLOCK_SIZE/sizeof(word32)]; byte partialBlock[AES_BLOCK_SIZE];
byte ctr[AES_BLOCK_SIZE];
byte* authInPadded = NULL; byte* authInPadded = NULL;
int authPadSz; int authPadSz;
@ -5611,34 +5619,16 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
return ret; return ret;
#endif #endif
/* if sz is not multiple of block size then hardware computed tag XMEMSET(ctr, 0, AES_BLOCK_SIZE);
will be incorrect, so use software */
if (partial != 0) {
byte initialCounter[AES_BLOCK_SIZE];
byte Tprime[AES_BLOCK_SIZE];
byte EKY0[AES_BLOCK_SIZE];
XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
if (ivSz == GCM_NONCE_MID_SZ) {
XMEMCPY(initialCounter, iv, ivSz);
initialCounter[AES_BLOCK_SIZE - 1] = 1;
}
else {
GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
}
GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
wc_AesEncrypt(aes, initialCounter, EKY0);
xorbuf(Tprime, EKY0, sizeof(Tprime));
if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
return AES_GCM_AUTH_E;
}
}
XMEMCPY(aes->reg, iv, ivSz);
if (ivSz == GCM_NONCE_MID_SZ) { if (ivSz == GCM_NONCE_MID_SZ) {
*((byte*)aes->reg + (AES_BLOCK_SIZE - 1)) = STM32_GCM_IV_START; XMEMCPY(ctr, iv, ivSz);
ctr[AES_BLOCK_SIZE - 1] = 1;
} }
else {
GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
}
/* Hardware requires counter + 1 */
IncrementGcmCounter(ctr);
if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
/* Need to pad the AAD to a full block with zeros. */ /* Need to pad the AAD to a full block with zeros. */
@ -5656,7 +5646,7 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
} }
#ifdef WOLFSSL_STM32_CUBEMX #ifdef WOLFSSL_STM32_CUBEMX
hcryp.Init.pInitVect = (uint8_t*)aes->reg; hcryp.Init.pInitVect = (uint8_t*)ctr;
hcryp.Init.Header = authInPadded; hcryp.Init.Header = authInPadded;
hcryp.Init.HeaderSize = authInSz; hcryp.Init.HeaderSize = authInSz;
@ -5684,15 +5674,14 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
/* GCM payload phase - partial remainder */ /* GCM payload phase - partial remainder */
XMEMSET(partialBlock, 0, sizeof(partialBlock)); XMEMSET(partialBlock, 0, sizeof(partialBlock));
XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)partialBlock, partial, status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
(byte*)partialBlock, STM32_HAL_TIMEOUT); partialBlock, STM32_HAL_TIMEOUT);
XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
} }
if (status == HAL_OK) { if (status == HAL_OK) {
/* GCM final phase */ /* GCM final phase */
hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE;
status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, (byte*)tag, status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
STM32_HAL_TIMEOUT);
} }
#else #else
HAL_CRYP_Init(&hcryp); HAL_CRYP_Init(&hcryp);
@ -5703,14 +5692,13 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
/* GCM payload phase - partial remainder */ /* GCM payload phase - partial remainder */
XMEMSET(partialBlock, 0, sizeof(partialBlock)); XMEMSET(partialBlock, 0, sizeof(partialBlock));
XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)partialBlock, partial, status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial,
(byte*)partialBlock, STM32_HAL_TIMEOUT); partialBlock, STM32_HAL_TIMEOUT);
XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
} }
if (status == HAL_OK) { if (status == HAL_OK) {
/* Compute the authTag */ /* Compute the authTag */
status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, (byte*)tag, status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
STM32_HAL_TIMEOUT);
} }
#endif #endif
@ -5725,17 +5713,24 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
/* Input size and auth size need to be the actual sizes, even though /* Input size and auth size need to be the actual sizes, even though
* they are not block aligned, because this length (in bits) is used * they are not block aligned, because this length (in bits) is used
* in the final GHASH. */ * in the final GHASH. */
status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)aes->reg, status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr,
(uint8_t*)keyCopy, keySize * 8, (uint8_t*)keyCopy, keySize * 8,
(uint8_t*)in, sz, (uint8_t*)in, sz,
(uint8_t*)authInPadded,authInSz, (uint8_t*)authInPadded, authInSz,
(uint8_t*)out, (byte*)tag); (uint8_t*)out, tag);
if (status != SUCCESS) if (status != SUCCESS)
ret = AES_GCM_AUTH_E; ret = AES_GCM_AUTH_E;
#endif /* WOLFSSL_STM32_CUBEMX */ #endif /* WOLFSSL_STM32_CUBEMX */
/* if authTag was not already validated check it */ /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */
if (partial == 0 && ConstantCompare(authTag, (byte*)tag, authTagSz) != 0) { if (ivSz != GCM_NONCE_MID_SZ) {
DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag));
wc_AesEncrypt(aes, ctr, partialBlock);
xorbuf(tag, partialBlock, sizeof(tag));
}
if (ConstantCompare(authTag, tag, authTagSz) != 0) {
ret = AES_GCM_AUTH_E; ret = AES_GCM_AUTH_E;
} }
@ -5914,13 +5909,11 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
#endif /* WOLFSSL_ASYNC_CRYPT */ #endif /* WOLFSSL_ASYNC_CRYPT */
#ifdef STM32_CRYPTO_AES_GCM #ifdef STM32_CRYPTO_AES_GCM
/* STM hardware only supports IV of 12 thru 16 bytes */
/* The STM standard peripheral library API's doesn't support partial blocks */ /* The STM standard peripheral library API's doesn't support partial blocks */
if (ivSz >= GCM_NONCE_MID_SZ && ivSz <= GCM_NONCE_MAX_SZ
#ifdef STD_PERI_LIB #ifdef STD_PERI_LIB
&& partial == 0 if (partial == 0)
#endif #endif
) { {
return wc_AesGcmDecrypt_STM32( return wc_AesGcmDecrypt_STM32(
aes, out, in, sz, iv, ivSz, aes, out, in, sz, iv, ivSz,
authTag, authTagSz, authIn, authInSz); authTag, authTagSz, authIn, authInSz);