Merge pull request #9797 from douzzer/20260218-aes-prefetch-optimize

20260218-aes-prefetch-optimize
This commit is contained in:
Sean Parkinson
2026-02-26 08:27:08 +10:00
committed by GitHub
10 changed files with 309 additions and 91 deletions
+1
View File
@@ -1094,6 +1094,7 @@ __clang_major__
__cplusplus
__ghc__
__ghs__
__has_attribute
__hpux__
__i386
__i386__
+251 -47
View File
@@ -70,9 +70,9 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#include <wolfssl/wolfcrypt/port/nxp/se050_port.h>
#endif
#if defined(WOLFSSL_AES_SIV)
#ifdef WOLFSSL_CMAC
#include <wolfssl/wolfcrypt/cmac.h>
#endif /* WOLFSSL_AES_SIV */
#endif
#if defined(WOLFSSL_HAVE_PSA) && !defined(WOLFSSL_PSA_NO_AES)
#include <wolfssl/wolfcrypt/port/psa/psa.h>
@@ -2142,8 +2142,12 @@ static word32 GetTable8_4(const byte* t, byte o0, byte o1, byte o2, byte o3)
* @param [out] outBlock Encrypted block.
* @param [in] r Rounds divided by 2.
*/
#define WC_AES_HAVE_PREFETCH_ARG
static int always_prefetch = 0;
WC_MAYBE_UNUSED static int never_prefetch = 1;
WC_ARGS_NOT_NULL((1, 2, 3, 5))
static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
word32 r)
word32 r, int *prefetch_ptr)
{
word32 s0 = 0, s1 = 0, s2 = 0, s3 = 0;
word32 t0 = 0, t1 = 0, t2 = 0, t3 = 0;
@@ -2178,8 +2182,15 @@ static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
s3 ^= rk[3];
#ifndef WOLFSSL_AES_SMALL_TABLES
#ifndef WC_NO_CACHE_RESISTANT
s0 |= PreFetchTe();
if (*prefetch_ptr == 0) {
s0 |= PreFetchTe();
if (prefetch_ptr != &always_prefetch)
*prefetch_ptr = 1;
}
#else
(void)prefetch_ptr;
#endif
#ifndef WOLFSSL_AES_TOUCH_LINES
@@ -2320,9 +2331,17 @@ static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
s2 ^= u2 & 0x000000ff; s3 ^= u3 & 0x000000ff;
}
#endif
#else
#else /* WOLFSSL_AES_SMALL_TABLES */
#ifndef WC_NO_CACHE_RESISTANT
s0 |= PreFetchSBox();
if (*prefetch_ptr == 0) {
s0 |= PreFetchSBox();
if (prefetch_ptr != &always_prefetch)
*prefetch_ptr = 1;
}
#else
(void)prefetch_ptr;
#endif
r *= 2;
@@ -2399,7 +2418,8 @@ static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
s1 = t1 ^ rk[1];
s2 = t2 ^ rk[2];
s3 = t3 ^ rk[3];
#endif
#endif /* WOLFSSL_AES_SMALL_TABLES */
/* write out */
#ifdef LITTLE_ENDIAN_ORDER
@@ -2429,9 +2449,10 @@ static void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
static void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz)
{
word32 i;
int did_prefetches = 0;
for (i = 0; i < sz; i += WC_AES_BLOCK_SIZE) {
AesEncrypt_C(aes, in, out, aes->rounds >> 1);
AesEncrypt_C(aes, in, out, aes->rounds >> 1, &did_prefetches);
in += WC_AES_BLOCK_SIZE;
out += WC_AES_BLOCK_SIZE;
}
@@ -2998,10 +3019,18 @@ extern void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz);
#endif /* !WC_AES_BITSLICED */
/* this section disabled with NO_AES_192 */
/* calling this one when missing NO_AES_192 */
#ifdef WC_AES_HAVE_PREFETCH_ARG
#define wc_AesEncrypt(aes, inBlock, outBlock) \
AesEncrypt_preFetchOpt(aes, inBlock, outBlock, &always_prefetch)
WC_ALL_ARGS_NOT_NULL static WARN_UNUSED_RESULT int AesEncrypt_preFetchOpt(
Aes* aes, const byte* inBlock, byte* outBlock, int *prefetch_ptr)
#else
#define AesEncrypt_preFetchOpt(aes, inBlock, outBlock, prefetch_ptr) \
wc_AesEncrypt(aes, inBlock, outBlock)
static WARN_UNUSED_RESULT int wc_AesEncrypt(
Aes* aes, const byte* inBlock, byte* outBlock)
WC_ALL_ARGS_NOT_NULL
#endif
{
#if defined(MAX3266X_AES)
word32 keySize;
@@ -3153,7 +3182,11 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt(
}
#endif
#ifdef WC_AES_HAVE_PREFETCH_ARG
AesEncrypt_C(aes, inBlock, outBlock, r, prefetch_ptr);
#else
AesEncrypt_C(aes, inBlock, outBlock, r);
#endif
return 0;
} /* wc_AesEncrypt */
@@ -3211,8 +3244,14 @@ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTd4(void)
* @param [out] outBlock Encrypted block.
* @param [in] r Rounds divided by 2.
*/
#ifndef WC_AES_HAVE_PREFETCH_ARG
#define WC_AES_HAVE_PREFETCH_ARG
static int always_prefetch = 0;
WC_MAYBE_UNUSED static int never_prefetch = 1;
#endif
WC_ARGS_NOT_NULL((1, 2, 3, 5))
static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
word32 r)
word32 r, int *prefetch_ptr)
{
word32 s0 = 0, s1 = 0, s2 = 0, s3 = 0;
word32 t0 = 0, t1 = 0, t2 = 0, t3 = 0;
@@ -3246,8 +3285,16 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
s3 ^= rk[3];
#ifndef WOLFSSL_AES_SMALL_TABLES
#ifndef WC_NO_CACHE_RESISTANT
s0 |= PreFetchTd();
if (*prefetch_ptr == 0) {
s0 |= PreFetchTd();
/* don't set the prefetched flag here -- PreFetchTd4() is called
* below.
*/
}
#else
(void)prefetch_ptr;
#endif
#ifndef WOLFSSL_AES_TOUCH_LINES
@@ -3330,7 +3377,13 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
*/
#ifndef WC_NO_CACHE_RESISTANT
t0 |= PreFetchTd4();
if (*prefetch_ptr == 0) {
t0 |= PreFetchTd4();
if (prefetch_ptr != &always_prefetch)
*prefetch_ptr = 1;
}
#else
(void)prefetch_ptr;
#endif
s0 = GetTable8_4(Td4, GETBYTE(t0, 3), GETBYTE(t3, 2),
@@ -3341,9 +3394,17 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
GETBYTE(t0, 1), GETBYTE(t3, 0)) ^ rk[2];
s3 = GetTable8_4(Td4, GETBYTE(t3, 3), GETBYTE(t2, 2),
GETBYTE(t1, 1), GETBYTE(t0, 0)) ^ rk[3];
#else
#else /* WOLFSSL_AES_SMALL_TABLES */
#ifndef WC_NO_CACHE_RESISTANT
s0 |= PreFetchTd4();
if (*prefetch_ptr == 0) {
s0 |= PreFetchTd4();
if (prefetch_ptr != &always_prefetch)
*prefetch_ptr = 1;
}
#else
(void)prefetch_ptr;
#endif
r *= 2;
@@ -3419,7 +3480,8 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
s1 = t1 ^ rk[1];
s2 = t2 ^ rk[2];
s3 = t3 ^ rk[3];
#endif
#endif /* WOLFSSL_AES_SMALL_TABLES */
/* write out */
#ifdef LITTLE_ENDIAN_ORDER
@@ -3450,9 +3512,10 @@ static void AesDecrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
static void AesDecryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz)
{
word32 i;
int did_prefetches = 0;
for (i = 0; i < sz; i += WC_AES_BLOCK_SIZE) {
AesDecrypt_C(aes, in, out, aes->rounds >> 1);
AesDecrypt_C(aes, in, out, aes->rounds >> 1, &did_prefetches);
in += WC_AES_BLOCK_SIZE;
out += WC_AES_BLOCK_SIZE;
}
@@ -3808,8 +3871,18 @@ static void AesDecryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz)
#if defined(__aarch64__) || !defined(WOLFSSL_ARMASM)
#if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT)
/* Software AES - ECB Decrypt */
static WARN_UNUSED_RESULT int wc_AesDecrypt(
#ifdef WC_AES_HAVE_PREFETCH_ARG
#define wc_AesDecrypt(aes, inBlock, outBlock) \
AesDecrypt_preFetchOpt(aes, inBlock, outBlock, &always_prefetch)
WC_ALL_ARGS_NOT_NULL static WARN_UNUSED_RESULT int AesDecrypt_preFetchOpt(
Aes* aes, const byte* inBlock, byte* outBlock, int *prefetch_ptr)
#else
#define AesDecrypt_preFetchOpt(aes, inBlock, outBlock, prefetch_ptr) \
wc_AesDecrypt(aes, inBlock, outBlock)
WC_ALL_ARGS_NOT_NULL static WARN_UNUSED_RESULT int wc_AesDecrypt(
Aes* aes, const byte* inBlock, byte* outBlock)
#endif
{
#if defined(MAX3266X_AES)
word32 keySize;
@@ -3935,7 +4008,11 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(
}
#endif
#ifdef WC_AES_HAVE_PREFETCH_ARG
AesDecrypt_C(aes, inBlock, outBlock, r, prefetch_ptr);
#else
AesDecrypt_C(aes, inBlock, outBlock, r);
#endif
return 0;
} /* wc_AesDecrypt[_SW]() */
@@ -3946,7 +4023,16 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(
#endif /* NEED_AES_TABLES */
#ifndef WC_AES_HAVE_PREFETCH_ARG
#ifndef AesEncrypt_preFetchOpt
#define AesEncrypt_preFetchOpt(aes, inBlock, outBlock, do_preFetch) \
wc_AesEncrypt(aes, inBlock, outBlock)
#endif
#ifndef AesDecrypt_preFetchOpt
#define AesDecrypt_preFetchOpt(aes, inBlock, outBlock, do_preFetch) \
wc_AesDecrypt(aes, inBlock, outBlock)
#endif
#endif
/* wc_AesSetKey */
#if defined(STM32_CRYPTO)
@@ -5335,6 +5421,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
#else
/* Allow direct access to one block encrypt */
/* Note, the in and out args are swapped compared to wc_AesEncrypt(). */
int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
{
int ret;
@@ -5355,6 +5442,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
#ifdef HAVE_AES_DECRYPT
/* Allow direct access to one block decrypt */
/* Note, the in and out args are swapped compared to wc_AesDecrypt(). */
int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
{
int ret;
@@ -6097,7 +6185,6 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
offset += WC_AES_BLOCK_SIZE;
}
return 0;
}
#endif /* HAVE_AES_DECRYPT */
@@ -6471,10 +6558,15 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
else
#endif
{
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
ret = 0;
while (blocks--) {
xorbuf((byte*)aes->reg, in, WC_AES_BLOCK_SIZE);
ret = wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg,
(byte*)aes->reg,
&did_prefetches);
if (ret != 0)
break;
XMEMCPY(out, aes->reg, WC_AES_BLOCK_SIZE);
@@ -6713,9 +6805,13 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
}
}
#else
#ifdef WC_AES_HAVE_PREFETCH_ARG
{
int did_prefetches = 0;
#endif
while (blocks--) {
XMEMCPY(aes->tmp, in, WC_AES_BLOCK_SIZE);
ret = wc_AesDecrypt(aes, in, out);
ret = AesDecrypt_preFetchOpt(aes, in, out, &did_prefetches);
if (ret != 0)
return ret;
xorbuf(out, (byte*)aes->reg, WC_AES_BLOCK_SIZE);
@@ -6725,6 +6821,9 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
out += WC_AES_BLOCK_SIZE;
in += WC_AES_BLOCK_SIZE;
}
#ifdef WC_AES_HAVE_PREFETCH_ARG
}
#endif
#endif
}
@@ -6967,6 +7066,9 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
int ret = 0;
#endif
word32 processed;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
#if !(!defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO))
@@ -7118,7 +7220,9 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
#ifdef XTRANSFORM_AESCTRBLOCK
XTRANSFORM_AESCTRBLOCK(aes, out, in);
#else
ret = wc_AesEncrypt(aes, (byte*)aes->reg, scratch);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg,
scratch,
&did_prefetches);
if (ret != 0)
break;
xorbuf(scratch, in, WC_AES_BLOCK_SIZE);
@@ -7136,7 +7240,9 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
/* handle non block size remaining and store unused byte count in left */
if ((ret == 0) && sz) {
ret = wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg,
(byte*)aes->tmp,
&did_prefetches);
if (ret == 0) {
IncrementAesCounter((byte*)aes->reg);
aes->left = WC_AES_BLOCK_SIZE - sz;
@@ -7175,6 +7281,16 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
#endif /* WOLFSSL_AES_COUNTER */
#endif /* !WOLFSSL_RISCV_ASM */
#ifndef WC_AES_HAVE_PREFETCH_ARG
#ifndef AesEncrypt_preFetchOpt
#define AesEncrypt_preFetchOpt(aes, inBlock, outBlock, do_preFetch) \
wc_AesEncrypt(aes, inBlock, outBlock)
#endif
#ifndef AesDecrypt_preFetchOpt
#define AesDecrypt_preFetchOpt(aes, inBlock, outBlock, do_preFetch) \
wc_AesDecrypt(aes, inBlock, outBlock)
#endif
#endif
/*
* The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised
@@ -9616,6 +9732,9 @@ WARN_UNUSED_RESULT int AES_GCM_encrypt_C(
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
ALIGN16 byte initialCounter[WC_AES_BLOCK_SIZE];
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
if (ivSz == GCM_NONCE_MID_SZ) {
/* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */
@@ -9674,7 +9793,8 @@ WARN_UNUSED_RESULT int AES_GCM_encrypt_C(
while (blocks--) {
IncrementGcmCounter(counter);
#if !defined(WOLFSSL_PIC32MZ_CRYPT)
ret = wc_AesEncrypt(aes, counter, scratch);
ret = AesEncrypt_preFetchOpt(aes, counter, scratch,
&did_prefetches);
if (ret != 0)
return ret;
xorbufout(c, scratch, p, WC_AES_BLOCK_SIZE);
@@ -9686,14 +9806,15 @@ WARN_UNUSED_RESULT int AES_GCM_encrypt_C(
if (partial != 0) {
IncrementGcmCounter(counter);
ret = wc_AesEncrypt(aes, counter, scratch);
ret = AesEncrypt_preFetchOpt(aes, counter, scratch, &did_prefetches);
if (ret != 0)
return ret;
xorbufout(c, scratch, p, partial);
}
if (authTag) {
GHASH(&aes->gcm, authIn, authInSz, out, sz, authTag, authTagSz);
ret = wc_AesEncrypt(aes, initialCounter, scratch);
ret = AesEncrypt_preFetchOpt(aes, initialCounter, scratch,
&did_prefetches);
if (ret != 0)
return ret;
xorbuf(authTag, scratch, authTagSz);
@@ -12814,7 +12935,11 @@ static WARN_UNUSED_RESULT int roll_x(
in += WC_AES_BLOCK_SIZE;
inSz -= WC_AES_BLOCK_SIZE;
ret = wc_AesEncrypt(aes, out, out);
/* wc_AesCcmEncrypt(), wc_AesCcmDecrypt(), and roll_auth() only call
* roll_x() after the AES cache lines are already hot -- no need to
* absorb additional prefetch overhead here.
*/
ret = AesEncrypt_preFetchOpt(aes, out, out, &never_prefetch);
if (ret != 0)
return ret;
}
@@ -12822,7 +12947,11 @@ static WARN_UNUSED_RESULT int roll_x(
/* process remainder of the data */
if (inSz > 0) {
xorbuf(out, in, inSz);
ret = wc_AesEncrypt(aes, out, out);
/* wc_AesCcmEncrypt(), wc_AesCcmDecrypt(), and roll_auth() only call
* roll_x() after the AES cache lines are already hot -- no need to
* absorb additional prefetch overhead here.
*/
ret = AesEncrypt_preFetchOpt(aes, out, out, &never_prefetch);
if (ret != 0)
return ret;
}
@@ -12870,7 +12999,11 @@ static WARN_UNUSED_RESULT int roll_auth(
xorbuf(out + authLenSz, in, inSz);
inSz = 0;
}
ret = wc_AesEncrypt(aes, out, out);
/* wc_AesCcmEncrypt() and wc_AesCcmDecrypt() only call roll_auth() after the
* AES cache lines are already hot -- no need to absorb additional prefetch
* overhead here.
*/
ret = AesEncrypt_preFetchOpt(aes, out, out, &never_prefetch);
if ((ret == 0) && (inSz > 0)) {
ret = roll_x(aes, in, inSz, out);
@@ -12996,6 +13129,9 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
#endif
VECTOR_REGISTERS_PUSH;
/* note this wc_AesEncrypt() will perform cache prefetches if needed, so
* that the later encrypt ops don't need to.
*/
ret = wc_AesEncrypt(aes, B, A);
#ifdef WOLFSSL_CHECK_MEM_ZERO
if (ret == 0)
@@ -13014,7 +13150,7 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[0] = (byte)(lenSz - 1U);
for (i = 0; i < lenSz; i++)
B[WC_AES_BLOCK_SIZE - 1 - i] = 0;
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &never_prefetch);
}
if (ret == 0) {
@@ -13042,7 +13178,7 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
#endif
if (ret == 0) {
while (inSz >= WC_AES_BLOCK_SIZE) {
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &never_prefetch);
if (ret != 0)
break;
xorbuf(A, in, WC_AES_BLOCK_SIZE);
@@ -13055,7 +13191,7 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
}
}
if ((ret == 0) && (inSz > 0)) {
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &never_prefetch);
}
if ((ret == 0) && (inSz > 0)) {
xorbuf(A, in, inSz);
@@ -13095,6 +13231,9 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
byte mask = 0xFF;
const word32 wordSz = (word32)sizeof(word32);
int ret = 0;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
/* sanity check on arguments */
if (aes == NULL || (inSz != 0 && (in == NULL || out == NULL)) ||
@@ -13165,7 +13304,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
#endif
while (oSz >= WC_AES_BLOCK_SIZE) {
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &did_prefetches);
if (ret != 0)
break;
xorbuf(A, in, WC_AES_BLOCK_SIZE);
@@ -13177,14 +13316,14 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
}
if ((ret == 0) && (inSz > 0))
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &did_prefetches);
if ((ret == 0) && (inSz > 0)) {
xorbuf(A, in, oSz);
XMEMCPY(o, A, oSz);
for (i = 0; i < lenSz; i++)
B[WC_AES_BLOCK_SIZE - 1 - i] = 0;
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &did_prefetches);
}
if (ret == 0) {
@@ -13200,7 +13339,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[WC_AES_BLOCK_SIZE - 1 - i] = (byte)((inSz >> ((8 * i) & mask)) & mask);
}
ret = wc_AesEncrypt(aes, B, A);
ret = AesEncrypt_preFetchOpt(aes, B, A, &did_prefetches);
}
if (ret == 0) {
@@ -13214,7 +13353,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[0] = (byte)(lenSz - 1U);
for (i = 0; i < lenSz; i++)
B[WC_AES_BLOCK_SIZE - 1 - i] = 0;
ret = wc_AesEncrypt(aes, B, B);
ret = AesEncrypt_preFetchOpt(aes, B, B, &did_prefetches);
}
if (ret == 0)
@@ -13772,9 +13911,12 @@ static WARN_UNUSED_RESULT int _AesEcbEncrypt(
AesEncryptBlocks_C(aes, in, out, sz);
#else
word32 i;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
for (i = 0; i < sz; i += WC_AES_BLOCK_SIZE) {
ret = wc_AesEncryptDirect(aes, out, in);
ret = AesEncrypt_preFetchOpt(aes, in, out, &did_prefetches);
if (ret != 0)
break;
in += WC_AES_BLOCK_SIZE;
@@ -13936,6 +14078,9 @@ static WARN_UNUSED_RESULT int AesCfbEncrypt_C(Aes* aes, byte* out,
{
int ret = 0;
word32 processed;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
if ((aes == NULL) || (out == NULL) || (in == NULL)) {
return BAD_FUNC_ARG;
@@ -13960,7 +14105,8 @@ static WARN_UNUSED_RESULT int AesCfbEncrypt_C(Aes* aes, byte* out,
VECTOR_REGISTERS_PUSH;
while (sz >= WC_AES_BLOCK_SIZE) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->reg, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->reg,
&did_prefetches);
if (ret != 0) {
break;
}
@@ -13973,7 +14119,8 @@ static WARN_UNUSED_RESULT int AesCfbEncrypt_C(Aes* aes, byte* out,
/* encrypt left over data */
if ((ret == 0) && sz) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret == 0) {
xorbufout(out, in, aes->tmp, sz);
XMEMCPY(aes->reg, out, sz);
@@ -14004,6 +14151,9 @@ static WARN_UNUSED_RESULT int AesCfbDecrypt_C(Aes* aes, byte* out,
{
int ret = 0;
word32 processed;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
(void)mode;
@@ -14050,7 +14200,8 @@ static WARN_UNUSED_RESULT int AesCfbDecrypt_C(Aes* aes, byte* out,
}
#endif
while (sz >= WC_AES_BLOCK_SIZE) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret != 0) {
break;
}
@@ -14063,7 +14214,8 @@ static WARN_UNUSED_RESULT int AesCfbDecrypt_C(Aes* aes, byte* out,
/* decrypt left over data */
if ((ret == 0) && sz) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret == 0) {
XMEMCPY(aes->reg, in, sz);
xorbufout(out, in, aes->tmp, sz);
@@ -14144,6 +14296,9 @@ static WARN_UNUSED_RESULT int wc_AesFeedbackCFB8(
{
byte *pt;
int ret = 0;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
if (aes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
@@ -14156,7 +14311,8 @@ static WARN_UNUSED_RESULT int wc_AesFeedbackCFB8(
VECTOR_REGISTERS_PUSH;
while (sz > 0) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret != 0)
break;
if (dir == AES_DECRYPTION) {
@@ -14200,6 +14356,9 @@ static WARN_UNUSED_RESULT int wc_AesFeedbackCFB1(
byte* pt;
int bit = 7;
int ret = 0;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
if (aes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
@@ -14212,7 +14371,8 @@ static WARN_UNUSED_RESULT int wc_AesFeedbackCFB1(
VECTOR_REGISTERS_PUSH;
while (sz > 0) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret != 0)
break;
if (dir == AES_DECRYPTION) {
@@ -14351,6 +14511,9 @@ static WARN_UNUSED_RESULT int AesOfbCrypt_C(Aes* aes, byte* out, const byte* in,
{
int ret = 0;
word32 processed;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
if ((aes == NULL) || (out == NULL) || (in == NULL)) {
return BAD_FUNC_ARG;
@@ -14373,7 +14536,8 @@ static WARN_UNUSED_RESULT int AesOfbCrypt_C(Aes* aes, byte* out, const byte* in,
VECTOR_REGISTERS_PUSH;
while (sz >= WC_AES_BLOCK_SIZE) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->reg, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->reg,
&did_prefetches);
if (ret != 0) {
break;
}
@@ -14385,7 +14549,8 @@ static WARN_UNUSED_RESULT int AesOfbCrypt_C(Aes* aes, byte* out, const byte* in,
/* encrypt left over data */
if ((ret == 0) && sz) {
ret = wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
ret = AesEncrypt_preFetchOpt(aes, (byte*)aes->reg, (byte*)aes->tmp,
&did_prefetches);
if (ret == 0) {
XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE);
xorbufout(out, in, aes->tmp, sz);
@@ -16096,6 +16261,45 @@ int wc_AesXtsDecryptConsecutiveSectors(XtsAes* aes, byte* out, const byte* in,
#endif /* HAVE_AES_DECRYPT */
#endif /* WOLFSSL_AES_XTS */
#ifdef WOLFSSL_CMAC
int wc_local_CmacUpdateAes(struct Cmac *cmac, const byte* in, word32 inSz) {
int ret = 0;
Aes *aes = &cmac->aes;
#ifdef WC_AES_HAVE_PREFETCH_ARG
int did_prefetches = 0;
#endif
VECTOR_REGISTERS_PUSH;
while ((ret == 0) && (inSz != 0)) {
word32 add = min(inSz, WC_AES_BLOCK_SIZE - cmac->bufferSz);
XMEMCPY(&cmac->buffer[cmac->bufferSz], in, add);
cmac->bufferSz += add;
inSz -= add;
in += add;
if (cmac->bufferSz == WC_AES_BLOCK_SIZE && inSz != 0) {
if (cmac->totalSz != 0) {
xorbuf(cmac->buffer, cmac->digest, WC_AES_BLOCK_SIZE);
}
ret = AesEncrypt_preFetchOpt(aes, cmac->buffer,
cmac->digest, &did_prefetches);
if (ret == 0) {
cmac->totalSz += WC_AES_BLOCK_SIZE;
cmac->bufferSz = 0;
}
}
}
VECTOR_REGISTERS_POP;
return ret;
}
#endif /* WOLFSSL_CMAC */
#ifdef WOLFSSL_AES_SIV
/*
+5 -9
View File
@@ -228,6 +228,7 @@ int wc_CmacUpdate(Cmac* cmac, const byte* in, word32 inSz)
#if !defined(NO_AES) && defined(WOLFSSL_AES_DIRECT)
case WC_CMAC_AES:
{
#ifdef HAVE_SELFTEST
while ((ret == 0) && (inSz != 0)) {
word32 add = min(inSz, WC_AES_BLOCK_SIZE - cmac->bufferSz);
XMEMCPY(&cmac->buffer[cmac->bufferSz], in, add);
@@ -240,21 +241,16 @@ int wc_CmacUpdate(Cmac* cmac, const byte* in, word32 inSz)
if (cmac->totalSz != 0) {
xorbuf(cmac->buffer, cmac->digest, WC_AES_BLOCK_SIZE);
}
#ifndef HAVE_SELFTEST
ret = wc_AesEncryptDirect(&cmac->aes, cmac->digest,
cmac->buffer);
if (ret == 0) {
cmac->totalSz += WC_AES_BLOCK_SIZE;
cmac->bufferSz = 0;
}
#else
wc_AesEncryptDirect(&cmac->aes, cmac->digest,
cmac->buffer);
cmac->totalSz += WC_AES_BLOCK_SIZE;
cmac->bufferSz = 0;
#endif
}
}
#else
(void)ret;
ret = wc_local_CmacUpdateAes(cmac, in, inSz);
#endif
}; break;
#endif /* !NO_AES && WOLFSSL_AES_DIRECT */
default:
+7
View File
@@ -806,6 +806,13 @@ int wc_AesSivDecrypt_ex(const byte* key, word32 keySz, const AesSivAssoc* assoc,
const byte* in, word32 inSz, byte* siv, byte* out);
#endif
#ifdef WOLFSSL_CMAC
/* forward declaration, in case aes.h is being included by cmac.h */
struct Cmac;
WOLFSSL_LOCAL int wc_local_CmacUpdateAes(struct Cmac *cmac, const byte* in,
word32 inSz);
#endif
#ifdef WOLFSSL_AES_EAX
/* Because of the circular dependency between AES and CMAC, we need to prevent
+1 -7
View File
@@ -102,13 +102,7 @@
#define WOLFSSL_NO_HASH_RAW
#endif
#if defined(_MSC_VER)
#define SHA256_NOINLINE __declspec(noinline)
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
#define SHA256_NOINLINE __attribute__((noinline))
#else
#define SHA256_NOINLINE
#endif
#define SHA256_NOINLINE WC_NO_INLINE
#if !defined(NO_OLD_SHA_NAMES)
#define SHA256 WC_SHA256
+1 -7
View File
@@ -80,13 +80,7 @@
#include <wolfssl/wolfcrypt/port/st/stm32.h>
#endif
#if defined(_MSC_VER)
#define SHA512_NOINLINE __declspec(noinline)
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__GNUC__)
#define SHA512_NOINLINE __attribute__((noinline))
#else
#define SHA512_NOINLINE
#endif
#define SHA512_NOINLINE WC_NO_INLINE
#ifdef WOLFSSL_SHA512
+1 -12
View File
@@ -48,18 +48,7 @@
#undef WOLFSSL_HAVE_SP_ECC
#endif
#ifdef noinline
#define SP_NOINLINE noinline
#elif defined(_MSC_VER)
#define SP_NOINLINE __declspec(noinline)
#elif defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
#define SP_NOINLINE _Pragma("inline = never")
#elif defined(__GNUC__) || defined(__KEIL__) || defined(__DCC__)
#define SP_NOINLINE __attribute__((noinline))
#else
#define SP_NOINLINE
#endif
#define SP_NOINLINE WC_NO_INLINE
#ifdef __cplusplus
extern "C" {
+27
View File
@@ -2056,6 +2056,33 @@ WOLFSSL_API word32 CheckRunTimeSettings(void);
#define WC_NORETURN
#endif
#ifdef __has_attribute
#if __has_attribute(nonnull)
#ifndef WC_ARG_NOT_NULL
#define WC_ARG_NOT_NULL(a) __attribute__((nonnull(a)))
#endif
#ifndef WC_ARGS_NOT_NULL
/* double-parenthesize, a la WC_ARGS_NOT_NULL((1, 2)) -- this approach
* maintains compatibility with WOLF_NO_VARIADIC_MACROS.
*/
#define WC_ARGS_NOT_NULL(p_a) __attribute__((nonnull p_a))
#endif
#ifndef WC_ALL_ARGS_NOT_NULL
#define WC_ALL_ARGS_NOT_NULL __attribute__((nonnull))
#endif
#endif /* __has_attribute(nonnull) */
#endif /* defined(__has_attribute) */
#ifndef WC_ARG_NOT_NULL
#define WC_ARG_NOT_NULL(a) /* null expansion */
#endif
#ifndef WC_ARGS_NOT_NULL
#define WC_ARGS_NOT_NULL(p_a) /* null expansion */
#endif
#ifndef WC_ALL_ARGS_NOT_NULL
#define WC_ALL_ARGS_NOT_NULL
#endif
#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
defined(WOLFSSL_PUBLIC_MP) || defined(OPENSSL_EXTRA) || \
+1 -9
View File
@@ -44,15 +44,7 @@
#define WOLFSSL_MLKEM_NO_DECAPSULATE
#endif
#ifdef noinline
#define MLKEM_NOINLINE noinline
#elif defined(_MSC_VER)
#define MLKEM_NOINLINE __declspec(noinline)
#elif defined(__GNUC__)
#define MLKEM_NOINLINE __attribute__((noinline))
#else
#define MLKEM_NOINLINE
#endif
#define MLKEM_NOINLINE WC_NO_INLINE
enum {
/* Flags of Kyber keys. */
+14
View File
@@ -143,6 +143,20 @@
#endif
#endif
#ifndef WC_NO_INLINE
#ifdef noinline
#define WC_NO_INLINE noinline
#elif defined(_MSC_VER)
#define WC_NO_INLINE __declspec(noinline)
#elif defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
#define WC_NO_INLINE _Pragma("inline = never")
#elif defined(__GNUC__) || defined(__KEIL__) || defined(__DCC__)
#define WC_NO_INLINE __attribute__((noinline))
#else
#define WC_NO_INLINE
#endif
#endif
#ifndef WC_OMIT_FRAME_POINTER
#if defined(__GNUC__)
#define WC_OMIT_FRAME_POINTER \