ARMv8 : AES-CTR/CBC/GCM speed ups and refactor AES

This commit is contained in:
Jacob Barthelmeh
2016-09-15 06:03:48 +00:00
parent 78c0f98ea9
commit 6d82cba29c
5 changed files with 2254 additions and 464 deletions

View File

@@ -89,8 +89,12 @@ endif
endif
if BUILD_AES
if BUILD_ARMASM
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
else
src_libwolfssl_la_SOURCES += wolfcrypt/src/aes.c
endif
endif
if BUILD_CMAC
src_libwolfssl_la_SOURCES += wolfcrypt/src/cmac.c

View File

@@ -344,153 +344,6 @@ void wc_AesAsyncFree(Aes* aes)
#ifdef HAVE_AES_DECRYPT
#error nRF51 AES Hardware does not support decrypt
#endif /* HAVE_AES_DECRYPT */
#elif defined(WOLFSSL_ARMASM)
static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
byte* keyPt = (byte*)aes->key;
word32 rounds = aes->rounds;
/*
AESE exor's input with round key
shift rows of exor'ed result
sub bytes for shifted rows
*/
__asm__ __volatile__ (
"LD1 {v0.16b}, [%[CtrIn]] \n"
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
"AESE v0.16b, v1.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v2.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v3.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v4.16b \n"
"AESMC v0.16b, v0.16b \n"
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
"AESE v0.16b, v1.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v2.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v3.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v4.16b \n"
"AESMC v0.16b, v0.16b \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESE v0.16b, v1.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v2.16b \n"
"#subtract rounds done so far and see if should continue\n"
"MOV w12, %w[R] \n"
"SUB w12, w12, #10 \n"
"CBZ w12, final \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v1.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v2.16b \n"
"SUB w12, w12, #2 \n"
"CBZ w12, final \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v1.16b \n"
"AESMC v0.16b, v0.16b \n"
"AESE v0.16b, v2.16b \n"
"#Final AddRoundKey then store result \n"
"final: \n"
"LD1 {v1.16b}, [%[Key]], #16 \n"
"EOR v0.16b, v0.16b, v1.16b \n"
"ST1 {v0.16b}, [%[CtrOut]] \n"
:[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (rounds),
"=r" (inBlock)
:"0" (outBlock), [Key] "1" (keyPt), [R] "2" (rounds),
[CtrIn] "3" (inBlock)
: "cc", "memory", "w12"
);
return 0;
}
#ifdef HAVE_AES_DECRYPT
static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
byte* keyPt = (byte*)aes->key;
word32 rounds = aes->rounds;
/*
AESE exor's input with round key
shift rows of exor'ed result
sub bytes for shifted rows
*/
__asm__ __volatile__ (
"LD1 {v0.16b}, [%[CtrIn]] \n"
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
"AESD v0.16b, v1.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v2.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v3.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v4.16b \n"
"AESIMC v0.16b, v0.16b \n"
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
"AESD v0.16b, v1.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v2.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v3.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v4.16b \n"
"AESIMC v0.16b, v0.16b \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESD v0.16b, v1.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v2.16b \n"
"#subtract rounds done so far and see if should continue\n"
"MOV w12, %w[R] \n"
"SUB w12, w12, #10 \n"
"CBZ w12, finalDec \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v1.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v2.16b \n"
"SUB w12, w12, #2 \n"
"CBZ w12, finalDec \n"
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v1.16b \n"
"AESIMC v0.16b, v0.16b \n"
"AESD v0.16b, v2.16b \n"
"#Final AddRoundKey then store result \n"
"finalDec: \n"
"LD1 {v1.16b}, [%[Key]], #16 \n"
"EOR v0.16b, v0.16b, v1.16b \n"
"ST1 {v0.4s}, [%[CtrOut]] \n"
:[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (rounds),
"=r" (inBlock)
:"0" (outBlock), [Key] "1" (keyPt), [R] "2" (rounds),
[CtrIn] "3" (inBlock)
: "cc", "memory", "w12"
);
return 0;
}
#endif /* HAVE_AES_DECRYPT */
#else
/* using wolfCrypt software AES implementation */
@@ -1794,196 +1647,6 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
}
#elif defined(WOLFSSL_ARMASM)
static const byte rcon[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
};
/* Similar to wolfSSL software implementation of expanding the AES key.
* Changed out the locations of where table look ups where made to
* use hardware instruction. Also altered decryption key to match. */
int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
{
word32 temp, *rk = aes->key;
unsigned int i = 0;
#if defined(AES_MAX_KEY_SIZE)
const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
#endif
if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
return BAD_FUNC_ARG;
#if defined(AES_MAX_KEY_SIZE)
/* Check key length */
if (keylen > max_key_len) {
return BAD_FUNC_ARG;
}
#endif
#ifdef WOLFSSL_AES_COUNTER
aes->left = 0;
#endif /* WOLFSSL_AES_COUNTER */
aes->rounds = keylen/4 + 6;
XMEMCPY(rk, userKey, keylen);
switch(keylen)
{
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128
case 16:
while (1)
{
temp = rk[3];
/* get table value from hardware */
__asm__ volatile (
"DUP v1.4s, %w[in] \n"
"MOVI v0.16b, #0 \n"
"AESE v0.16b, v1.16b \n"
"UMOV %w[out], v0.4s[0] \n"
: [out] "=r"(temp)
: [in] "r" (temp)
: "cc", "memory", "v0", "v1"
);
temp = rotrFixed(temp, 8);
rk[4] = rk[0] ^ temp ^ rcon[i];
rk[5] = rk[4] ^ rk[1];
rk[6] = rk[5] ^ rk[2];
rk[7] = rk[6] ^ rk[3];
if (++i == 10)
break;
rk += 4;
}
break;
#endif /* 128 */
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192
case 24:
/* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
while (1)
{
temp = rk[5];
/* get table value from hardware */
__asm__ volatile (
"DUP v1.4s, %w[in] \n"
"MOVI v0.16b, #0 \n"
"AESE v0.16b, v1.16b \n"
"UMOV %w[out], v0.4s[0] \n"
: [out] "=r"(temp)
: [in] "r" (temp)
: "cc", "memory", "v0", "v1"
);
temp = rotrFixed(temp, 8);
rk[ 6] = rk[ 0] ^ temp ^ rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
rk[ 9] = rk[ 3] ^ rk[ 8];
if (++i == 8)
break;
rk[10] = rk[ 4] ^ rk[ 9];
rk[11] = rk[ 5] ^ rk[10];
rk += 6;
}
break;
#endif /* 192 */
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256
case 32:
while (1)
{
temp = rk[7];
/* get table value from hardware */
__asm__ volatile (
"DUP v1.4s, %w[in] \n"
"MOVI v0.16b, #0 \n"
"AESE v0.16b, v1.16b \n"
"UMOV %w[out], v0.4s[0] \n"
: [out] "=r"(temp)
: [in] "r" (temp)
: "cc", "memory", "v0", "v1"
);
temp = rotrFixed(temp, 8);
rk[8] = rk[0] ^ temp ^ rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
rk[11] = rk[ 3] ^ rk[10];
if (++i == 7)
break;
temp = rk[11];
/* get table value from hardware */
__asm__ volatile (
"DUP v1.4s, %w[in] \n"
"MOVI v0.16b, #0 \n"
"AESE v0.16b, v1.16b \n"
"UMOV %w[out], v0.4s[0] \n"
: [out] "=r"(temp)
: [in] "r" (temp)
: "cc", "memory", "v0", "v1"
);
rk[12] = rk[ 4] ^ temp;
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
rk += 8;
}
break;
#endif /* 256 */
default:
return BAD_FUNC_ARG;
}
if (dir == AES_DECRYPTION)
{
#ifdef HAVE_AES_DECRYPT
unsigned int j;
rk = aes->key;
/* invert the order of the round keys: */
for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
}
/* apply the inverse MixColumn transform to all round keys but the
first and the last: */
for (i = 1; i < aes->rounds; i++) {
rk += 4;
__asm__ volatile (
"LD1 {v0.16b}, [%[in]] \n"
"AESIMC v0.16b, v0.16b \n"
"ST1 {v0.16b}, [%[out]]\n"
: [out] "=r" (rk)
: [in] "0" (rk)
: "cc", "memory", "v0"
);
}
#else
WOLFSSL_MSG("AES Decryption not compiled in");
return BAD_FUNC_ARG;
#endif /* HAVE_AES_DECRYPT */
}
return wc_AesSetIV(aes, iv);
}
#if defined(WOLFSSL_AES_DIRECT)
int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
{
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
}
#endif
#else
static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
@@ -3165,7 +2828,7 @@ static INLINE void IncrementGcmCounter(byte* inOutCtr)
}
#if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(WOLFSSL_ARMASM)
#if defined(GCM_SMALL) || defined(GCM_TABLE)
static INLINE void FlattenSzInBits(byte* buf, word32 sz)
{
@@ -3249,20 +2912,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
if (ret == 0) {
wc_AesEncrypt(aes, iv, aes->H);
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
{
word32* pt = (word32*)aes->H;
__asm__ volatile (
"LD1 {v0.16b}, [%[h]] \n"
"RBIT v0.16b, v0.16b \n"
"ST1 {v0.16b}, [%[out]] \n"
: [out] "=r" (pt)
: [h] "0" (pt)
: "cc", "memory"
);
return ret; /* no need to generate GCM_TABLE */
}
#endif
#ifdef GCM_TABLE
GenerateM0(aes);
#endif /* GCM_TABLE */
@@ -3699,118 +3348,7 @@ static int AES_GCM_decrypt(const unsigned char *in,
#endif /* WOLFSSL_AESNI */
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
/* PMULL and RBIT only with AArch64 */
/* Use ARM hardware for polynomial multiply */
static void GMULT(byte* X, byte* Y)
{
word32* Xpt = (word32*)X;
word32* Ypt = (word32*)Y;
__asm__ volatile (
"LD1 {v0.16b}, [%[inX]] \n"
"LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */
"RBIT v0.16b, v0.16b \n"
/* Algorithm 1 from Intel GCM white paper.
"Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
*/
"PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */
"PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */
"EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */
"PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */
"PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */
"#Set a register to all 0s using EOR \n"
"EOR v7.16b, v7.16b, v7.16b \n"
"EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */
"EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */
"EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */
"EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */
/* Based from White Paper "Implementing GCM on ARMv8"
by Conrado P.L. Gouvea and Julio Lopez
reduction on 256bit value using Algorithm 5 */
"MOVI v8.16b, #0x87 \n"
"USHR v8.2d, v8.2d, #56 \n"
/* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/
"PMULL2 v5.1q, v4.2d, v8.2d \n"
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */
"EOR v4.16b, v4.16b, v6.16b \n"
"EXT v6.16b, v7.16b, v5.16b, #8 \n"
"EOR v3.16b, v3.16b, v6.16b \n"
"PMULL v5.1q, v4.1d, v8.1d \n"
"EOR v4.16b, v3.16b, v5.16b \n"
"RBIT v4.16b, v4.16b \n"
"STR q4, [%[out]] \n"
: [out] "=r" (Xpt), "=r" (Ypt)
: [inX] "0" (Xpt), [inY] "1" (Ypt)
: "cc", "memory", "v3", "v4", "v5", "v6", "v7", "v8"
);
}
/* Currently is a copy from GCM_SMALL wolfSSL version. Duplicated and set
* seperate for future optimizations. */
static void GHASH(Aes* aes, const byte* a, word32 aSz,
const byte* c, word32 cSz, byte* s, word32 sSz)
{
byte x[AES_BLOCK_SIZE];
byte scratch[AES_BLOCK_SIZE];
word32 blocks, partial;
byte* h = aes->H;
XMEMSET(x, 0, AES_BLOCK_SIZE);
/* Hash in A, the Additional Authentication Data */
if (aSz != 0 && a != NULL) {
blocks = aSz / AES_BLOCK_SIZE;
partial = aSz % AES_BLOCK_SIZE;
while (blocks--) {
xorbuf(x, a, AES_BLOCK_SIZE);
GMULT(x, h);
a += AES_BLOCK_SIZE;
}
if (partial != 0) {
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
XMEMCPY(scratch, a, partial);
xorbuf(x, scratch, AES_BLOCK_SIZE);
GMULT(x, h);
}
}
/* Hash in C, the Ciphertext */
if (cSz != 0 && c != NULL) {
blocks = cSz / AES_BLOCK_SIZE;
partial = cSz % AES_BLOCK_SIZE;
while (blocks--) {
xorbuf(x, c, AES_BLOCK_SIZE);
GMULT(x, h);
c += AES_BLOCK_SIZE;
}
if (partial != 0) {
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
XMEMCPY(scratch, c, partial);
xorbuf(x, scratch, AES_BLOCK_SIZE);
GMULT(x, h);
}
}
/* Hash in the lengths of A and C in bits */
FlattenSzInBits(&scratch[0], aSz);
FlattenSzInBits(&scratch[8], cSz);
xorbuf(x, scratch, AES_BLOCK_SIZE);
GMULT(x, h);
/* Copy the result into s. */
XMEMCPY(s, x, sSz);
}
/* not using ARMASM for multiplication */
#elif defined(GCM_SMALL)
#if defined(GCM_SMALL)
static void GMULT(byte* X, byte* Y)
{
byte Z[AES_BLOCK_SIZE];

File diff suppressed because it is too large Load Diff

View File

@@ -69,6 +69,10 @@ int wolfCrypt_Init()
}
#endif
#ifdef WOLFSSL_ARMASM
WOLFSSL_MSG("Using ARM hardware acceleration");
#endif
initRefCount = 1;
}

View File

@@ -2815,6 +2815,7 @@ int aes_test(void)
ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
if (ret != 0)
return -1032;
ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
if (ret != 0)
return -1033;
@@ -2878,6 +2879,64 @@ int aes_test(void)
0xc2
};
/* test vector from "Recommendation for Block Cipher Modes of Operation"
* NIST Special Publication 800-38A */
const byte ctr192Key[] =
{
0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
};
const byte ctr192Iv[] =
{
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};
const byte ctr192Plain[] =
{
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
};
const byte ctr192Cipher[] =
{
0x1a,0xbc,0x93,0x24,0x17,0x52,0x1c,0xa2,
0x4f,0x2b,0x04,0x59,0xfe,0x7e,0x6e,0x0b
};
/* test vector from "Recommendation for Block Cipher Modes of Operation"
* NIST Special Publication 800-38A */
const byte ctr256Key[] =
{
0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
};
const byte ctr256Iv[] =
{
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};
const byte ctr256Plain[] =
{
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
};
const byte ctr256Cipher[] =
{
0x60,0x1e,0xc3,0x13,0x77,0x57,0x89,0xa5,
0xb7,0xa7,0xf5,0x04,0xbb,0xf3,0xd2,0x28
};
wc_AesSetKeyDirect(&enc, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
/* Ctr only uses encrypt, even on key setup */
wc_AesSetKeyDirect(&dec, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
@@ -2914,6 +2973,40 @@ int aes_test(void)
if (XMEMCMP(cipher, oddCipher, 9))
return -71;
/* 192 bit key */
wc_AesSetKeyDirect(&enc, ctr192Key, sizeof(ctr192Key),
ctr192Iv, AES_ENCRYPTION);
/* Ctr only uses encrypt, even on key setup */
wc_AesSetKeyDirect(&dec, ctr192Key, sizeof(ctr192Key),
ctr192Iv, AES_ENCRYPTION);
XMEMSET(plain, 0, sizeof(plain));
wc_AesCtrEncrypt(&enc, plain, ctr192Cipher, sizeof(ctr192Cipher));
if (XMEMCMP(plain, ctr192Plain, sizeof(ctr192Plain)))
return -72;
wc_AesCtrEncrypt(&dec, cipher, ctr192Plain, sizeof(ctr192Plain));
if (XMEMCMP(ctr192Cipher, cipher, sizeof(ctr192Cipher)))
return -73;
/* 256 bit key */
wc_AesSetKeyDirect(&enc, ctr256Key, sizeof(ctr256Key),
ctr256Iv, AES_ENCRYPTION);
/* Ctr only uses encrypt, even on key setup */
wc_AesSetKeyDirect(&dec, ctr256Key, sizeof(ctr256Key),
ctr256Iv, AES_ENCRYPTION);
XMEMSET(plain, 0, sizeof(plain));
wc_AesCtrEncrypt(&enc, plain, ctr256Cipher, sizeof(ctr256Cipher));
if (XMEMCMP(plain, ctr256Plain, sizeof(ctr256Plain)))
return -74;
wc_AesCtrEncrypt(&dec, cipher, ctr256Plain, sizeof(ctr256Plain));
if (XMEMCMP(ctr256Cipher, cipher, sizeof(ctr256Cipher)))
return -75;
}
#endif /* WOLFSSL_AES_COUNTER */