mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-08-03 12:44:45 +02:00
ARMv8 : AES-CTR/CBC/GCM speed ups and refactor AES
This commit is contained in:
@@ -89,8 +89,12 @@ endif
|
||||
endif
|
||||
|
||||
if BUILD_AES
|
||||
if BUILD_ARMASM
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
|
||||
else
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/aes.c
|
||||
endif
|
||||
endif
|
||||
|
||||
if BUILD_CMAC
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/cmac.c
|
||||
|
@@ -344,153 +344,6 @@ void wc_AesAsyncFree(Aes* aes)
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
#error nRF51 AES Hardware does not support decrypt
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
byte* keyPt = (byte*)aes->key;
|
||||
word32 rounds = aes->rounds;
|
||||
|
||||
/*
|
||||
AESE exor's input with round key
|
||||
shift rows of exor'ed result
|
||||
sub bytes for shifted rows
|
||||
*/
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LD1 {v0.16b}, [%[CtrIn]] \n"
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v3.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v4.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v3.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v4.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"#subtract rounds done so far and see if should continue\n"
|
||||
"MOV w12, %w[R] \n"
|
||||
"SUB w12, w12, #10 \n"
|
||||
"CBZ w12, final \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"SUB w12, w12, #2 \n"
|
||||
"CBZ w12, final \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"#Final AddRoundKey then store result \n"
|
||||
"final: \n"
|
||||
"LD1 {v1.16b}, [%[Key]], #16 \n"
|
||||
"EOR v0.16b, v0.16b, v1.16b \n"
|
||||
"ST1 {v0.16b}, [%[CtrOut]] \n"
|
||||
|
||||
:[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (rounds),
|
||||
"=r" (inBlock)
|
||||
:"0" (outBlock), [Key] "1" (keyPt), [R] "2" (rounds),
|
||||
[CtrIn] "3" (inBlock)
|
||||
: "cc", "memory", "w12"
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
byte* keyPt = (byte*)aes->key;
|
||||
word32 rounds = aes->rounds;
|
||||
|
||||
/*
|
||||
AESE exor's input with round key
|
||||
shift rows of exor'ed result
|
||||
sub bytes for shifted rows
|
||||
*/
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LD1 {v0.16b}, [%[CtrIn]] \n"
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v3.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v4.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v3.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v4.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"#subtract rounds done so far and see if should continue\n"
|
||||
"MOV w12, %w[R] \n"
|
||||
"SUB w12, w12, #10 \n"
|
||||
"CBZ w12, finalDec \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"SUB w12, w12, #2 \n"
|
||||
"CBZ w12, finalDec \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"#Final AddRoundKey then store result \n"
|
||||
"finalDec: \n"
|
||||
"LD1 {v1.16b}, [%[Key]], #16 \n"
|
||||
"EOR v0.16b, v0.16b, v1.16b \n"
|
||||
"ST1 {v0.4s}, [%[CtrOut]] \n"
|
||||
|
||||
:[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (rounds),
|
||||
"=r" (inBlock)
|
||||
:"0" (outBlock), [Key] "1" (keyPt), [R] "2" (rounds),
|
||||
[CtrIn] "3" (inBlock)
|
||||
: "cc", "memory", "w12"
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#else
|
||||
|
||||
/* using wolfCrypt software AES implementation */
|
||||
@@ -1794,196 +1647,6 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
|
||||
}
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
static const byte rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
};
|
||||
|
||||
|
||||
/* Similar to wolfSSL software implementation of expanding the AES key.
|
||||
* Changed out the locations of where table look ups where made to
|
||||
* use hardware instruction. Also altered decryption key to match. */
|
||||
int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
{
|
||||
word32 temp, *rk = aes->key;
|
||||
unsigned int i = 0;
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE)
|
||||
const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
|
||||
#endif
|
||||
|
||||
if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE)
|
||||
/* Check key length */
|
||||
if (keylen > max_key_len) {
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_AES_COUNTER
|
||||
aes->left = 0;
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
|
||||
aes->rounds = keylen/4 + 6;
|
||||
XMEMCPY(rk, userKey, keylen);
|
||||
|
||||
switch(keylen)
|
||||
{
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128
|
||||
case 16:
|
||||
while (1)
|
||||
{
|
||||
temp = rk[3];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[4] = rk[0] ^ temp ^ rcon[i];
|
||||
rk[5] = rk[4] ^ rk[1];
|
||||
rk[6] = rk[5] ^ rk[2];
|
||||
rk[7] = rk[6] ^ rk[3];
|
||||
if (++i == 10)
|
||||
break;
|
||||
rk += 4;
|
||||
}
|
||||
break;
|
||||
#endif /* 128 */
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192
|
||||
case 24:
|
||||
/* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
|
||||
while (1)
|
||||
{
|
||||
temp = rk[5];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[ 6] = rk[ 0] ^ temp ^ rcon[i];
|
||||
rk[ 7] = rk[ 1] ^ rk[ 6];
|
||||
rk[ 8] = rk[ 2] ^ rk[ 7];
|
||||
rk[ 9] = rk[ 3] ^ rk[ 8];
|
||||
if (++i == 8)
|
||||
break;
|
||||
rk[10] = rk[ 4] ^ rk[ 9];
|
||||
rk[11] = rk[ 5] ^ rk[10];
|
||||
rk += 6;
|
||||
}
|
||||
break;
|
||||
#endif /* 192 */
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256
|
||||
case 32:
|
||||
while (1)
|
||||
{
|
||||
temp = rk[7];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[8] = rk[0] ^ temp ^ rcon[i];
|
||||
rk[ 9] = rk[ 1] ^ rk[ 8];
|
||||
rk[10] = rk[ 2] ^ rk[ 9];
|
||||
rk[11] = rk[ 3] ^ rk[10];
|
||||
if (++i == 7)
|
||||
break;
|
||||
temp = rk[11];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
rk[12] = rk[ 4] ^ temp;
|
||||
rk[13] = rk[ 5] ^ rk[12];
|
||||
rk[14] = rk[ 6] ^ rk[13];
|
||||
rk[15] = rk[ 7] ^ rk[14];
|
||||
|
||||
rk += 8;
|
||||
}
|
||||
break;
|
||||
#endif /* 256 */
|
||||
|
||||
default:
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
if (dir == AES_DECRYPTION)
|
||||
{
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
unsigned int j;
|
||||
rk = aes->key;
|
||||
|
||||
/* invert the order of the round keys: */
|
||||
for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
|
||||
temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
|
||||
temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
|
||||
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
|
||||
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
|
||||
}
|
||||
/* apply the inverse MixColumn transform to all round keys but the
|
||||
first and the last: */
|
||||
for (i = 1; i < aes->rounds; i++) {
|
||||
rk += 4;
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[in]] \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"ST1 {v0.16b}, [%[out]]\n"
|
||||
: [out] "=r" (rk)
|
||||
: [in] "0" (rk)
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#else
|
||||
WOLFSSL_MSG("AES Decryption not compiled in");
|
||||
return BAD_FUNC_ARG;
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
}
|
||||
|
||||
return wc_AesSetIV(aes, iv);
|
||||
}
|
||||
|
||||
#if defined(WOLFSSL_AES_DIRECT)
|
||||
int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
{
|
||||
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
@@ -3165,7 +2828,7 @@ static INLINE void IncrementGcmCounter(byte* inOutCtr)
|
||||
}
|
||||
|
||||
|
||||
#if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(WOLFSSL_ARMASM)
|
||||
#if defined(GCM_SMALL) || defined(GCM_TABLE)
|
||||
|
||||
static INLINE void FlattenSzInBits(byte* buf, word32 sz)
|
||||
{
|
||||
@@ -3249,20 +2912,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
||||
|
||||
if (ret == 0) {
|
||||
wc_AesEncrypt(aes, iv, aes->H);
|
||||
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
|
||||
{
|
||||
word32* pt = (word32*)aes->H;
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[h]] \n"
|
||||
"RBIT v0.16b, v0.16b \n"
|
||||
"ST1 {v0.16b}, [%[out]] \n"
|
||||
: [out] "=r" (pt)
|
||||
: [h] "0" (pt)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return ret; /* no need to generate GCM_TABLE */
|
||||
}
|
||||
#endif
|
||||
#ifdef GCM_TABLE
|
||||
GenerateM0(aes);
|
||||
#endif /* GCM_TABLE */
|
||||
@@ -3699,118 +3348,7 @@ static int AES_GCM_decrypt(const unsigned char *in,
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
|
||||
/* PMULL and RBIT only with AArch64 */
|
||||
/* Use ARM hardware for polynomial multiply */
|
||||
static void GMULT(byte* X, byte* Y)
|
||||
{
|
||||
word32* Xpt = (word32*)X;
|
||||
word32* Ypt = (word32*)Y;
|
||||
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[inX]] \n"
|
||||
"LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */
|
||||
"RBIT v0.16b, v0.16b \n"
|
||||
|
||||
|
||||
/* Algorithm 1 from Intel GCM white paper.
|
||||
"Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
|
||||
*/
|
||||
"PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */
|
||||
"PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */
|
||||
"EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */
|
||||
"PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */
|
||||
"PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */
|
||||
|
||||
"#Set a register to all 0s using EOR \n"
|
||||
"EOR v7.16b, v7.16b, v7.16b \n"
|
||||
"EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */
|
||||
"EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */
|
||||
"EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */
|
||||
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */
|
||||
"EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */
|
||||
|
||||
|
||||
/* Based from White Paper "Implementing GCM on ARMv8"
|
||||
by Conrado P.L. Gouvea and Julio Lopez
|
||||
reduction on 256bit value using Algorithm 5 */
|
||||
"MOVI v8.16b, #0x87 \n"
|
||||
"USHR v8.2d, v8.2d, #56 \n"
|
||||
/* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/
|
||||
"PMULL2 v5.1q, v4.2d, v8.2d \n"
|
||||
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */
|
||||
"EOR v4.16b, v4.16b, v6.16b \n"
|
||||
"EXT v6.16b, v7.16b, v5.16b, #8 \n"
|
||||
"EOR v3.16b, v3.16b, v6.16b \n"
|
||||
"PMULL v5.1q, v4.1d, v8.1d \n"
|
||||
"EOR v4.16b, v3.16b, v5.16b \n"
|
||||
|
||||
"RBIT v4.16b, v4.16b \n"
|
||||
"STR q4, [%[out]] \n"
|
||||
: [out] "=r" (Xpt), "=r" (Ypt)
|
||||
: [inX] "0" (Xpt), [inY] "1" (Ypt)
|
||||
: "cc", "memory", "v3", "v4", "v5", "v6", "v7", "v8"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/* Currently is a copy from GCM_SMALL wolfSSL version. Duplicated and set
|
||||
* seperate for future optimizations. */
|
||||
static void GHASH(Aes* aes, const byte* a, word32 aSz,
|
||||
const byte* c, word32 cSz, byte* s, word32 sSz)
|
||||
{
|
||||
byte x[AES_BLOCK_SIZE];
|
||||
byte scratch[AES_BLOCK_SIZE];
|
||||
word32 blocks, partial;
|
||||
byte* h = aes->H;
|
||||
|
||||
XMEMSET(x, 0, AES_BLOCK_SIZE);
|
||||
|
||||
/* Hash in A, the Additional Authentication Data */
|
||||
if (aSz != 0 && a != NULL) {
|
||||
blocks = aSz / AES_BLOCK_SIZE;
|
||||
partial = aSz % AES_BLOCK_SIZE;
|
||||
while (blocks--) {
|
||||
xorbuf(x, a, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
a += AES_BLOCK_SIZE;
|
||||
}
|
||||
if (partial != 0) {
|
||||
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
||||
XMEMCPY(scratch, a, partial);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
}
|
||||
}
|
||||
|
||||
/* Hash in C, the Ciphertext */
|
||||
if (cSz != 0 && c != NULL) {
|
||||
blocks = cSz / AES_BLOCK_SIZE;
|
||||
partial = cSz % AES_BLOCK_SIZE;
|
||||
while (blocks--) {
|
||||
xorbuf(x, c, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
c += AES_BLOCK_SIZE;
|
||||
}
|
||||
if (partial != 0) {
|
||||
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
||||
XMEMCPY(scratch, c, partial);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
}
|
||||
}
|
||||
|
||||
/* Hash in the lengths of A and C in bits */
|
||||
FlattenSzInBits(&scratch[0], aSz);
|
||||
FlattenSzInBits(&scratch[8], cSz);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
|
||||
/* Copy the result into s. */
|
||||
XMEMCPY(s, x, sSz);
|
||||
}
|
||||
/* not using ARMASM for multiplication */
|
||||
#elif defined(GCM_SMALL)
|
||||
#if defined(GCM_SMALL)
|
||||
static void GMULT(byte* X, byte* Y)
|
||||
{
|
||||
byte Z[AES_BLOCK_SIZE];
|
||||
|
2151
wolfcrypt/src/port/arm/armv8-aes.c
Normal file
2151
wolfcrypt/src/port/arm/armv8-aes.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -69,6 +69,10 @@ int wolfCrypt_Init()
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
WOLFSSL_MSG("Using ARM hardware acceleration");
|
||||
#endif
|
||||
|
||||
initRefCount = 1;
|
||||
}
|
||||
|
||||
|
@@ -2815,6 +2815,7 @@ int aes_test(void)
|
||||
ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
|
||||
if (ret != 0)
|
||||
return -1032;
|
||||
|
||||
ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
|
||||
if (ret != 0)
|
||||
return -1033;
|
||||
@@ -2878,6 +2879,64 @@ int aes_test(void)
|
||||
0xc2
|
||||
};
|
||||
|
||||
|
||||
/* test vector from "Recommendation for Block Cipher Modes of Operation"
|
||||
* NIST Special Publication 800-38A */
|
||||
const byte ctr192Key[] =
|
||||
{
|
||||
0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
|
||||
0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
|
||||
0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
|
||||
};
|
||||
|
||||
const byte ctr192Iv[] =
|
||||
{
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
|
||||
0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
|
||||
|
||||
const byte ctr192Plain[] =
|
||||
{
|
||||
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
|
||||
0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
|
||||
};
|
||||
|
||||
const byte ctr192Cipher[] =
|
||||
{
|
||||
0x1a,0xbc,0x93,0x24,0x17,0x52,0x1c,0xa2,
|
||||
0x4f,0x2b,0x04,0x59,0xfe,0x7e,0x6e,0x0b
|
||||
};
|
||||
|
||||
/* test vector from "Recommendation for Block Cipher Modes of Operation"
|
||||
* NIST Special Publication 800-38A */
|
||||
const byte ctr256Key[] =
|
||||
{
|
||||
0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
|
||||
0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
|
||||
0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
|
||||
0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
|
||||
};
|
||||
|
||||
const byte ctr256Iv[] =
|
||||
{
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
|
||||
0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
|
||||
|
||||
const byte ctr256Plain[] =
|
||||
{
|
||||
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
|
||||
0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
|
||||
};
|
||||
|
||||
const byte ctr256Cipher[] =
|
||||
{
|
||||
0x60,0x1e,0xc3,0x13,0x77,0x57,0x89,0xa5,
|
||||
0xb7,0xa7,0xf5,0x04,0xbb,0xf3,0xd2,0x28
|
||||
};
|
||||
|
||||
wc_AesSetKeyDirect(&enc, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
|
||||
/* Ctr only uses encrypt, even on key setup */
|
||||
wc_AesSetKeyDirect(&dec, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
|
||||
@@ -2914,6 +2973,40 @@ int aes_test(void)
|
||||
|
||||
if (XMEMCMP(cipher, oddCipher, 9))
|
||||
return -71;
|
||||
|
||||
/* 192 bit key */
|
||||
wc_AesSetKeyDirect(&enc, ctr192Key, sizeof(ctr192Key),
|
||||
ctr192Iv, AES_ENCRYPTION);
|
||||
/* Ctr only uses encrypt, even on key setup */
|
||||
wc_AesSetKeyDirect(&dec, ctr192Key, sizeof(ctr192Key),
|
||||
ctr192Iv, AES_ENCRYPTION);
|
||||
|
||||
XMEMSET(plain, 0, sizeof(plain));
|
||||
wc_AesCtrEncrypt(&enc, plain, ctr192Cipher, sizeof(ctr192Cipher));
|
||||
|
||||
if (XMEMCMP(plain, ctr192Plain, sizeof(ctr192Plain)))
|
||||
return -72;
|
||||
|
||||
wc_AesCtrEncrypt(&dec, cipher, ctr192Plain, sizeof(ctr192Plain));
|
||||
if (XMEMCMP(ctr192Cipher, cipher, sizeof(ctr192Cipher)))
|
||||
return -73;
|
||||
|
||||
/* 256 bit key */
|
||||
wc_AesSetKeyDirect(&enc, ctr256Key, sizeof(ctr256Key),
|
||||
ctr256Iv, AES_ENCRYPTION);
|
||||
/* Ctr only uses encrypt, even on key setup */
|
||||
wc_AesSetKeyDirect(&dec, ctr256Key, sizeof(ctr256Key),
|
||||
ctr256Iv, AES_ENCRYPTION);
|
||||
|
||||
XMEMSET(plain, 0, sizeof(plain));
|
||||
wc_AesCtrEncrypt(&enc, plain, ctr256Cipher, sizeof(ctr256Cipher));
|
||||
|
||||
if (XMEMCMP(plain, ctr256Plain, sizeof(ctr256Plain)))
|
||||
return -74;
|
||||
|
||||
wc_AesCtrEncrypt(&dec, cipher, ctr256Plain, sizeof(ctr256Plain));
|
||||
if (XMEMCMP(ctr256Cipher, cipher, sizeof(ctr256Cipher)))
|
||||
return -75;
|
||||
}
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
|
||||
|
Reference in New Issue
Block a user