forked from wolfSSL/wolfssl
initial ARMv8 instructions
This commit is contained in:
@@ -344,7 +344,160 @@ void wc_AesAsyncFree(Aes* aes)
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
#error nRF51 AES Hardware does not support decrypt
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
byte* keyPt = (byte*)aes->key;
|
||||
word32 rounds = aes->rounds;
|
||||
byte out[AES_BLOCK_SIZE];
|
||||
byte* output = out;
|
||||
byte* input = (byte*)inBlock;
|
||||
|
||||
|
||||
/*
|
||||
AESE exor's input with round key
|
||||
shift rows of exor'ed result
|
||||
sub bytes for shifted rows
|
||||
*/
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LD1 {v0.16b}, [%[CtrIn]], #16 \n"
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v3.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v4.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v3.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v4.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"#subtract rounds done so far and see if should continue\n"
|
||||
"MOV w12, %w[R] \n"
|
||||
"SUB w12, w12, #10 \n"
|
||||
"CBZ w12, final \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"SUB w12, w12, #2 \n"
|
||||
"CBZ w12, final \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"AESMC v0.16b, v0.16b \n"
|
||||
"AESE v0.16b, v2.16b \n"
|
||||
|
||||
"#Final AddRoundKey then store result \n"
|
||||
"final: \n"
|
||||
"LD1 {v1.16b}, [%[Key]], #16 \n"
|
||||
"EOR v0.16b, v0.16b, v1.16b \n"
|
||||
"ST1 {v0.16b}, [%[CtrOut]] \n"
|
||||
|
||||
:[CtrOut] "=r" (output), "=r" (keyPt), "=r" (rounds)
|
||||
:[Key] "1" (keyPt), [R] "2" (rounds), [CtrIn] "r" (input), "0" (output)
|
||||
: "cc", "memory", "w12"
|
||||
);
|
||||
|
||||
XMEMCPY(outBlock, out, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
byte* keyPt = (byte*)aes->key;
|
||||
word32 rounds = aes->rounds;
|
||||
byte out[AES_BLOCK_SIZE];
|
||||
byte* output = out;
|
||||
byte* input = (byte*)inBlock;
|
||||
|
||||
/*
|
||||
AESE exor's input with round key
|
||||
shift rows of exor'ed result
|
||||
sub bytes for shifted rows
|
||||
*/
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LD1 {v0.16b}, [%[CtrIn]], #16 \n"
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v3.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v4.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v4.16b}, [%[Key]], #64 \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v3.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v4.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"#subtract rounds done so far and see if should continue\n"
|
||||
"MOV w12, %w[R] \n"
|
||||
"SUB w12, w12, #10 \n"
|
||||
"CBZ w12, finalDec \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"SUB w12, w12, #2 \n"
|
||||
"CBZ w12, finalDec \n"
|
||||
"LD1 {v1.16b-v2.16b}, [%[Key]], #32 \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v1.16b \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"AESD v0.16b, v2.16b \n"
|
||||
|
||||
"#Final AddRoundKey then store result \n"
|
||||
"finalDec: \n"
|
||||
"LD1 {v1.16b}, [%[Key]], #16 \n"
|
||||
"EOR v0.16b, v0.16b, v1.16b \n"
|
||||
"ST1 {v0.4s}, [%[CtrOut]] \n"
|
||||
|
||||
:[CtrOut] "=r" (output), "=r" (keyPt), "=r" (rounds), "=r" (input)
|
||||
:[Key] "1" (keyPt), [R] "2" (rounds), [CtrIn] "3" (input), "0" (output)
|
||||
: "cc", "memory", "w12"
|
||||
);
|
||||
|
||||
XMEMCPY(outBlock, out, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#else
|
||||
|
||||
/* using wolfCrypt software AES implementation */
|
||||
@@ -1533,7 +1686,6 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
}
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */
|
||||
|
||||
#endif /* NEED_AES_TABLES */
|
||||
|
||||
|
||||
@@ -1678,6 +1830,196 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||
{
|
||||
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
|
||||
}
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
static const byte rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
};
|
||||
|
||||
|
||||
/* Similar to wolfSSL software implementation of expanding the AES key.
|
||||
* Changed out the locations of where table look ups where made to
|
||||
* use hardware instruction. Also altered decryption key to match. */
|
||||
int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
{
|
||||
word32 temp, *rk = aes->key;
|
||||
unsigned int i = 0;
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE)
|
||||
const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
|
||||
#endif
|
||||
|
||||
if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE)
|
||||
/* Check key length */
|
||||
if (keylen > max_key_len) {
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_AES_COUNTER
|
||||
aes->left = 0;
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
|
||||
aes->rounds = keylen/4 + 6;
|
||||
XMEMCPY(rk, userKey, keylen);
|
||||
|
||||
switch(keylen)
|
||||
{
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128
|
||||
case 16:
|
||||
while (1)
|
||||
{
|
||||
temp = rk[3];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[4] = rk[0] ^ temp ^ rcon[i];
|
||||
rk[5] = rk[4] ^ rk[1];
|
||||
rk[6] = rk[5] ^ rk[2];
|
||||
rk[7] = rk[6] ^ rk[3];
|
||||
if (++i == 10)
|
||||
break;
|
||||
rk += 4;
|
||||
}
|
||||
break;
|
||||
#endif /* 128 */
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192
|
||||
case 24:
|
||||
/* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
|
||||
while (1)
|
||||
{
|
||||
temp = rk[5];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[ 6] = rk[ 0] ^ temp ^ rcon[i];
|
||||
rk[ 7] = rk[ 1] ^ rk[ 6];
|
||||
rk[ 8] = rk[ 2] ^ rk[ 7];
|
||||
rk[ 9] = rk[ 3] ^ rk[ 8];
|
||||
if (++i == 8)
|
||||
break;
|
||||
rk[10] = rk[ 4] ^ rk[ 9];
|
||||
rk[11] = rk[ 5] ^ rk[10];
|
||||
rk += 6;
|
||||
}
|
||||
break;
|
||||
#endif /* 192 */
|
||||
|
||||
#if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256
|
||||
case 32:
|
||||
while (1)
|
||||
{
|
||||
temp = rk[7];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
temp = rotrFixed(temp, 8);
|
||||
rk[8] = rk[0] ^ temp ^ rcon[i];
|
||||
rk[ 9] = rk[ 1] ^ rk[ 8];
|
||||
rk[10] = rk[ 2] ^ rk[ 9];
|
||||
rk[11] = rk[ 3] ^ rk[10];
|
||||
if (++i == 7)
|
||||
break;
|
||||
temp = rk[11];
|
||||
|
||||
/* get table value from hardware */
|
||||
__asm__ volatile (
|
||||
"DUP v1.4s, %w[in] \n"
|
||||
"MOVI v0.16b, #0 \n"
|
||||
"AESE v0.16b, v1.16b \n"
|
||||
"UMOV %w[out], v0.4s[0] \n"
|
||||
: [out] "=r"(temp)
|
||||
: [in] "r" (temp)
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
rk[12] = rk[ 4] ^ temp;
|
||||
rk[13] = rk[ 5] ^ rk[12];
|
||||
rk[14] = rk[ 6] ^ rk[13];
|
||||
rk[15] = rk[ 7] ^ rk[14];
|
||||
|
||||
rk += 8;
|
||||
}
|
||||
break;
|
||||
#endif /* 256 */
|
||||
|
||||
default:
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
if (dir == AES_DECRYPTION)
|
||||
{
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
unsigned int j;
|
||||
rk = aes->key;
|
||||
|
||||
/* invert the order of the round keys: */
|
||||
for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
|
||||
temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
|
||||
temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
|
||||
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
|
||||
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
|
||||
}
|
||||
/* apply the inverse MixColumn transform to all round keys but the
|
||||
first and the last: */
|
||||
for (i = 1; i < aes->rounds; i++) {
|
||||
rk += 4;
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[in]] \n"
|
||||
"AESIMC v0.16b, v0.16b \n"
|
||||
"ST1 {v0.16b}, [%[out]]\n"
|
||||
: [out] "=r" (rk)
|
||||
: [in] "0" (rk)
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#else
|
||||
WOLFSSL_MSG("AES Decryption not compiled in");
|
||||
return BAD_FUNC_ARG;
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
}
|
||||
|
||||
return wc_AesSetIV(aes, iv);
|
||||
}
|
||||
|
||||
#if defined(WOLFSSL_AES_DIRECT)
|
||||
int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
{
|
||||
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
|
||||
const byte* iv, int dir)
|
||||
@@ -2859,7 +3201,7 @@ static INLINE void IncrementGcmCounter(byte* inOutCtr)
|
||||
}
|
||||
|
||||
|
||||
#if defined(GCM_SMALL) || defined(GCM_TABLE)
|
||||
#if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(WOLFSSL_ARMASM)
|
||||
|
||||
static INLINE void FlattenSzInBits(byte* buf, word32 sz)
|
||||
{
|
||||
@@ -2943,6 +3285,20 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
||||
|
||||
if (ret == 0) {
|
||||
wc_AesEncrypt(aes, iv, aes->H);
|
||||
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
|
||||
{
|
||||
word32* pt = (word32*)aes->H;
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[h]] \n"
|
||||
"RBIT v0.16b, v0.16b \n"
|
||||
"ST1 {v0.16b}, [%[out]] \n"
|
||||
: [out] "=r" (pt)
|
||||
: [h] "0" (pt)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return ret; /* no need to generate GCM_TABLE */
|
||||
}
|
||||
#endif
|
||||
#ifdef GCM_TABLE
|
||||
GenerateM0(aes);
|
||||
#endif /* GCM_TABLE */
|
||||
@@ -3379,8 +3735,118 @@ static int AES_GCM_decrypt(const unsigned char *in,
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
|
||||
|
||||
#if defined(GCM_SMALL)
|
||||
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__)
|
||||
/* PMULL and RBIT only with AArch64 */
|
||||
/* Use ARM hardware for polynomial multiply */
|
||||
static void GMULT(byte* X, byte* Y)
|
||||
{
|
||||
word32* Xpt = (word32*)X;
|
||||
word32* Ypt = (word32*)Y;
|
||||
|
||||
__asm__ volatile (
|
||||
"LD1 {v0.16b}, [%[inX]] \n"
|
||||
"LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */
|
||||
"RBIT v0.16b, v0.16b \n"
|
||||
|
||||
|
||||
/* Algorithm 1 from Intel GCM white paper.
|
||||
"Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
|
||||
*/
|
||||
"PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */
|
||||
"PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */
|
||||
"EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */
|
||||
"PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */
|
||||
"PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */
|
||||
|
||||
"#Set a register to all 0s using EOR \n"
|
||||
"EOR v7.16b, v7.16b, v7.16b \n"
|
||||
"EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */
|
||||
"EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */
|
||||
"EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */
|
||||
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */
|
||||
"EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */
|
||||
|
||||
|
||||
/* Based from White Paper "Implementing GCM on ARMv8"
|
||||
by Conrado P.L. Gouvea and Julio Lopez
|
||||
reduction on 256bit value using Algorithm 5 */
|
||||
"MOVI v8.16b, #0x87 \n"
|
||||
"USHR v8.2d, v8.2d, #56 \n"
|
||||
/* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/
|
||||
"PMULL2 v5.1q, v4.2d, v8.2d \n"
|
||||
"EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */
|
||||
"EOR v4.16b, v4.16b, v6.16b \n"
|
||||
"EXT v6.16b, v7.16b, v5.16b, #8 \n"
|
||||
"EOR v3.16b, v3.16b, v6.16b \n"
|
||||
"PMULL v5.1q, v4.1d, v8.1d \n"
|
||||
"EOR v4.16b, v3.16b, v5.16b \n"
|
||||
|
||||
"RBIT v4.16b, v4.16b \n"
|
||||
"STR q4, [%[out]] \n"
|
||||
: [out] "=r" (Xpt), "=r" (Ypt)
|
||||
: [inX] "0" (Xpt), [inY] "1" (Ypt)
|
||||
: "cc", "memory", "v3", "v4", "v5", "v6", "v7", "v8"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/* Currently is a copy from GCM_SMALL wolfSSL version. Duplicated and set
|
||||
* seperate for future optimizations. */
|
||||
static void GHASH(Aes* aes, const byte* a, word32 aSz,
|
||||
const byte* c, word32 cSz, byte* s, word32 sSz)
|
||||
{
|
||||
byte x[AES_BLOCK_SIZE];
|
||||
byte scratch[AES_BLOCK_SIZE];
|
||||
word32 blocks, partial;
|
||||
byte* h = aes->H;
|
||||
|
||||
XMEMSET(x, 0, AES_BLOCK_SIZE);
|
||||
|
||||
/* Hash in A, the Additional Authentication Data */
|
||||
if (aSz != 0 && a != NULL) {
|
||||
blocks = aSz / AES_BLOCK_SIZE;
|
||||
partial = aSz % AES_BLOCK_SIZE;
|
||||
while (blocks--) {
|
||||
xorbuf(x, a, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
a += AES_BLOCK_SIZE;
|
||||
}
|
||||
if (partial != 0) {
|
||||
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
||||
XMEMCPY(scratch, a, partial);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
}
|
||||
}
|
||||
|
||||
/* Hash in C, the Ciphertext */
|
||||
if (cSz != 0 && c != NULL) {
|
||||
blocks = cSz / AES_BLOCK_SIZE;
|
||||
partial = cSz % AES_BLOCK_SIZE;
|
||||
while (blocks--) {
|
||||
xorbuf(x, c, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
c += AES_BLOCK_SIZE;
|
||||
}
|
||||
if (partial != 0) {
|
||||
XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
||||
XMEMCPY(scratch, c, partial);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
}
|
||||
}
|
||||
|
||||
/* Hash in the lengths of A and C in bits */
|
||||
FlattenSzInBits(&scratch[0], aSz);
|
||||
FlattenSzInBits(&scratch[8], cSz);
|
||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||
GMULT(x, h);
|
||||
|
||||
/* Copy the result into s. */
|
||||
XMEMCPY(s, x, sSz);
|
||||
}
|
||||
/* not using ARMASM for multiplication */
|
||||
#elif defined(GCM_SMALL)
|
||||
static void GMULT(byte* X, byte* Y)
|
||||
{
|
||||
byte Z[AES_BLOCK_SIZE];
|
||||
|
||||
Reference in New Issue
Block a user