Aarch64 ASM: Use CPU features for more

AES GCM streaming - fix GHASH_ONE_BLOCK to use CPU feature information.
AES-GCM uses EOR3 (SHA-3 instruction) - split assembly code.
Kyber uses SQRDMLSH - split assembly code.

Changed define from WOLFSSL_AARCH64_NO_SQRMLSH to
WOLFSSL_AARCH64_NO_SQRDMLSH to match instruction.

Improved array data format for inline assembly code.
This commit is contained in:
Sean Parkinson
2025-01-02 19:56:04 +10:00
parent 239b85c804
commit 7d3ee74a71
11 changed files with 16423 additions and 8024 deletions

View File

@ -3125,7 +3125,7 @@ then
AM_CPPFLAGS="$AM_CPPFLAGS+sm4"
fi
else
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto -DWOLFSSL_AARCH64_NO_SQRMLSH"
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto -DWOLFSSL_AARCH64_NO_SQRDMLSH"
fi
;;
esac

View File

@ -805,6 +805,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
aes->use_aes_hw_crypto = IS_AARCH64_AES(cpuid_flags);
#ifdef HAVE_AESGCM
aes->use_pmull_hw_crypto = IS_AARCH64_PMULL(cpuid_flags);
aes->use_sha3_hw_crypto = IS_AARCH64_SHA3(cpuid_flags);
#endif
}
@ -6448,6 +6449,22 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
#define AES_LASTGBLOCK(aes) ((aes)->streamData + 3 * WC_AES_BLOCK_SIZE)
/* Access last encrypted block. */
#define AES_LASTBLOCK(aes) ((aes)->streamData + 4 * WC_AES_BLOCK_SIZE)
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
#define GHASH_ONE_BLOCK(aes, block) \
do { \
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { \
GHASH_ONE_BLOCK_AARCH64(aes, block); \
} \
else { \
GHASH_ONE_BLOCK_SW(aes, block); \
} \
} \
while (0)
#else
#define GHASH_ONE_BLOCK GHASH_ONE_BLOCK_SW
#endif
#endif
#if defined(HAVE_COLDFIRE_SEC)
@ -6866,7 +6883,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
xorbuf(AES_TAG(aes), block, WC_AES_BLOCK_SIZE); \
GMULT(AES_TAG(aes), aes->gcm.H); \
@ -7099,7 +7116,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
xorbuf(AES_TAG(aes), block, WC_AES_BLOCK_SIZE); \
GMULT(AES_TAG(aes), aes->gcm.M0); \
@ -7392,8 +7409,6 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
*/
#define GHASH_INIT_EXTRA(aes) WC_DO_NOTHING
#if !defined(__aarch64__) || !defined(WOLFSSL_ARMASM) || \
defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
/* GHASH one block of data..
*
* XOR block into tag and GMULT with H using pre-computed table.
@ -7401,13 +7416,12 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
xorbuf(AES_TAG(aes), block, WC_AES_BLOCK_SIZE); \
GMULT(AES_TAG(aes), (aes)->gcm.M0); \
} \
while (0)
#endif
#endif /* WOLFSSL_AESGCM_STREAM */
#elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32)
@ -7574,7 +7588,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
word64* x = (word64*)AES_TAG(aes); \
word64* h = (word64*)aes->gcm.H; \
@ -7652,7 +7666,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
word64* x = (word64*)AES_TAG(aes); \
word64* h = (word64*)aes->gcm.H; \
@ -7884,7 +7898,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
word32* x = (word32*)AES_TAG(aes); \
word32* h = (word32*)aes->gcm.H; \
@ -7936,7 +7950,7 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_SW(aes, block) \
do { \
word32* x = (word32*)AES_TAG(aes); \
word32* h = (word32*)aes->gcm.H; \
@ -8139,7 +8153,8 @@ static void GHASH_FINAL(Aes* aes, byte* s, word32 sSz)
}
if (over > 0) {
/* Zeroize the unused part of the block. */
XMEMSET(AES_LASTGBLOCK(aes) + over, 0, (size_t)WC_AES_BLOCK_SIZE - over);
XMEMSET(AES_LASTGBLOCK(aes) + over, 0,
(size_t)WC_AES_BLOCK_SIZE - over);
/* Hash the last block of cipher text. */
GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
}
@ -10189,7 +10204,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv,
else
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (aes->use_aes_hw_crypto) {
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
AES_GCM_init_AARCH64(aes, iv, ivSz);
/* Reset state fields. */
@ -10328,7 +10343,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
else
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (aes->use_aes_hw_crypto) {
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
AES_GCM_crypt_update_AARCH64(aes, out, in, sz);
GHASH_UPDATE_AARCH64(aes, authIn, authInSz, out, sz);
}
@ -10388,7 +10403,7 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz)
else
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (aes->use_aes_hw_crypto) {
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
AES_GCM_final_AARCH64(aes, authTag, authTagSz);
}
else
@ -10477,7 +10492,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
else
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (aes->use_aes_hw_crypto) {
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
GHASH_UPDATE_AARCH64(aes, authIn, authInSz, in, sz);
AES_GCM_crypt_update_AARCH64(aes, out, in, sz);
}
@ -10535,7 +10550,7 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
else
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
if (aes->use_aes_hw_crypto) {
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
ALIGN32 byte calcTag[WC_AES_BLOCK_SIZE];
AES_GCM_final_AARCH64(aes, calcTag, authTagSz);
/* Check calculated tag matches the one passed in. */

View File

@ -289,7 +289,7 @@
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
cpuid_flags |= CPUID_SHA512;
#endif
#ifndef WOLFSSL_AARCH64_NO_SQRMLSH
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
cpuid_flags |= CPUID_RDM;
#endif
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -37,30 +37,18 @@
#ifdef WOLFSSL_SHA3
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
static const word64 L_SHA3_transform_crypto_r[] = {
0x1UL,
0x8082UL,
0x800000000000808aUL,
0x8000000080008000UL,
0x808bUL,
0x80000001UL,
0x8000000080008081UL,
0x8000000000008009UL,
0x8aUL,
0x88UL,
0x80008009UL,
0x8000000aUL,
0x8000808bUL,
0x800000000000008bUL,
0x8000000000008089UL,
0x8000000000008003UL,
0x8000000000008002UL,
0x8000000000000080UL,
0x800aUL,
0x800000008000000aUL,
0x8000000080008081UL,
0x8000000000008080UL,
0x80000001UL,
0x8000000080008008UL,
0x0000000000000001, 0x0000000000008082,
0x800000000000808a, 0x8000000080008000,
0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009,
0x000000000000008a, 0x0000000000000088,
0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b,
0x8000000000008089, 0x8000000000008003,
0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a,
0x8000000080008081, 0x8000000000008080,
0x0000000080000001, 0x8000000080008008,
};
void BlockSha3_crypto(word64* state)
@ -183,30 +171,18 @@ void BlockSha3_crypto(word64* state)
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
static const word64 L_SHA3_transform_base_r[] = {
0x1UL,
0x8082UL,
0x800000000000808aUL,
0x8000000080008000UL,
0x808bUL,
0x80000001UL,
0x8000000080008081UL,
0x8000000000008009UL,
0x8aUL,
0x88UL,
0x80008009UL,
0x8000000aUL,
0x8000808bUL,
0x800000000000008bUL,
0x8000000000008089UL,
0x8000000000008003UL,
0x8000000000008002UL,
0x8000000000000080UL,
0x800aUL,
0x800000008000000aUL,
0x8000000080008081UL,
0x8000000000008080UL,
0x80000001UL,
0x8000000080008008UL,
0x0000000000000001, 0x0000000000008082,
0x800000000000808a, 0x8000000080008000,
0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009,
0x000000000000008a, 0x0000000000000088,
0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b,
0x8000000000008089, 0x8000000000008003,
0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a,
0x8000000080008081, 0x8000000000008080,
0x0000000080000001, 0x8000000080008008,
};
void BlockSha3_base(word64* state)

View File

@ -36,91 +36,50 @@
#ifdef WOLFSSL_SHA512
static const word64 L_SHA512_transform_neon_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL,
0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL,
0x59f111f1b605d019UL,
0x923f82a4af194f9bUL,
0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL,
0x12835b0145706fbeUL,
0x243185be4ee4b28cUL,
0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL,
0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL,
0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL,
0xefbe4786384f25e3UL,
0xfc19dc68b8cd5b5UL,
0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL,
0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL,
0x76f988da831153b5UL,
0x983e5152ee66dfabUL,
0xa831c66d2db43210UL,
0xb00327c898fb213fUL,
0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL,
0xd5a79147930aa725UL,
0x6ca6351e003826fUL,
0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL,
0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL,
0x53380d139d95b3dfUL,
0x650a73548baf63deUL,
0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL,
0x92722c851482353bUL,
0xa2bfe8a14cf10364UL,
0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL,
0xc76c51a30654be30UL,
0xd192e819d6ef5218UL,
0xd69906245565a910UL,
0xf40e35855771202aUL,
0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL,
0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL,
0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL,
0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL,
0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL,
0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL,
0x8cc702081a6439ecUL,
0x90befffa23631e28UL,
0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL,
0xc67178f2e372532bUL,
0xca273eceea26619cUL,
0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL,
0xf57d4f7fee6ed178UL,
0x6f067aa72176fbaUL,
0xa637dc5a2c898a6UL,
0x113f9804bef90daeUL,
0x1b710b35131c471bUL,
0x28db77f523047d84UL,
0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL,
0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL,
0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL,
0x6c44198c4a475817UL,
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
};
static const word64 L_SHA512_transform_neon_len_ror8[] = {
0x7060504030201UL,
0x80f0e0d0c0b0a09UL,
0x0007060504030201, 0x080f0e0d0c0b0a09,
};
void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
@ -1054,86 +1013,46 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
static const word64 L_SHA512_transform_crypto_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL,
0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL,
0x59f111f1b605d019UL,
0x923f82a4af194f9bUL,
0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL,
0x12835b0145706fbeUL,
0x243185be4ee4b28cUL,
0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL,
0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL,
0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL,
0xefbe4786384f25e3UL,
0xfc19dc68b8cd5b5UL,
0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL,
0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL,
0x76f988da831153b5UL,
0x983e5152ee66dfabUL,
0xa831c66d2db43210UL,
0xb00327c898fb213fUL,
0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL,
0xd5a79147930aa725UL,
0x6ca6351e003826fUL,
0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL,
0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL,
0x53380d139d95b3dfUL,
0x650a73548baf63deUL,
0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL,
0x92722c851482353bUL,
0xa2bfe8a14cf10364UL,
0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL,
0xc76c51a30654be30UL,
0xd192e819d6ef5218UL,
0xd69906245565a910UL,
0xf40e35855771202aUL,
0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL,
0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL,
0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL,
0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL,
0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL,
0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL,
0x8cc702081a6439ecUL,
0x90befffa23631e28UL,
0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL,
0xc67178f2e372532bUL,
0xca273eceea26619cUL,
0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL,
0xf57d4f7fee6ed178UL,
0x6f067aa72176fbaUL,
0xa637dc5a2c898a6UL,
0x113f9804bef90daeUL,
0x1b710b35131c471bUL,
0x28db77f523047d84UL,
0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL,
0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL,
0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL,
0x6c44198c4a475817UL,
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
};
void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len);

View File

@ -84,7 +84,8 @@
/* Declared in wc_kyber.c to stop compiler optimizer from simplifying. */
extern volatile sword16 kyber_opt_blocker;
#ifdef USE_INTEL_SPEEDUP
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
static word32 cpuid_flags = 0;
#endif
@ -1099,7 +1100,8 @@ static void kyber_pointwise_acc_mont(sword16* r, const sword16* a,
*/
void kyber_init(void)
{
#ifdef USE_INTEL_SPEEDUP
#if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \
defined(WOLFSSL_ARMASM))
cpuid_flags = cpuid_get_flags();
#endif
}
@ -1121,7 +1123,32 @@ void kyber_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a,
{
int i;
/* Transform private key. All of result used in public key calculation */
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
if (IS_AARCH64_RDM(cpuid_flags)) {
/* Transform private key. All of result used in public key calculation.
*/
for (i = 0; i < kp; ++i) {
kyber_ntt_sqrdmlsh(priv + i * KYBER_N);
}
/* For each polynomial in the vectors. */
for (i = 0; i < kp; ++i) {
/* Multiply a by private into public polynomial. */
kyber_pointwise_acc_mont(pub + i * KYBER_N, a + i * kp * KYBER_N,
priv, kp);
/* Convert public polynomial to Montgomery form. */
kyber_to_mont_sqrdmlsh(pub + i * KYBER_N);
/* Transform error values polynomial. */
kyber_ntt_sqrdmlsh(e + i * KYBER_N);
/* Add errors to public key and reduce. */
kyber_add_reduce(pub + i * KYBER_N, e + i * KYBER_N);
}
}
else
#endif
{
/* Transform private key. All of result used in public key calculation.
*/
for (i = 0; i < kp; ++i) {
kyber_ntt(priv + i * KYBER_N);
}
@ -1129,8 +1156,8 @@ void kyber_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a,
/* For each polynomial in the vectors. */
for (i = 0; i < kp; ++i) {
/* Multiply a by private into public polynomial. */
kyber_pointwise_acc_mont(pub + i * KYBER_N, a + i * kp * KYBER_N, priv,
kp);
kyber_pointwise_acc_mont(pub + i * KYBER_N, a + i * kp * KYBER_N,
priv, kp);
/* Convert public polynomial to Montgomery form. */
kyber_to_mont(pub + i * KYBER_N);
/* Transform error values polynomial. */
@ -1139,6 +1166,7 @@ void kyber_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a,
kyber_add_reduce(pub + i * KYBER_N, e + i * KYBER_N);
}
}
}
/* Encapsulate message.
*
@ -1158,6 +1186,32 @@ void kyber_encapsulate(const sword16* pub, sword16* bp, sword16* v,
{
int i;
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
if (IS_AARCH64_RDM(cpuid_flags)) {
/* Transform sp. All of result used in calculation of bp and v. */
for (i = 0; i < kp; ++i) {
kyber_ntt_sqrdmlsh(sp + i * KYBER_N);
}
/* For each polynomial in the vectors. */
for (i = 0; i < kp; ++i) {
/* Multiply at by sp into bp polynomial. */
kyber_pointwise_acc_mont(bp + i * KYBER_N, at + i * kp * KYBER_N,
sp, kp);
/* Inverse transform bp polynomial. */
kyber_invntt_sqrdmlsh(bp + i * KYBER_N);
/* Add errors to bp and reduce. */
kyber_add_reduce(bp + i * KYBER_N, ep + i * KYBER_N);
}
/* Multiply public key by sp into v polynomial. */
kyber_pointwise_acc_mont(v, pub, sp, kp);
/* Inverse transform v. */
kyber_invntt_sqrdmlsh(v);
}
else
#endif
{
/* Transform sp. All of result used in calculation of bp and v. */
for (i = 0; i < kp; ++i) {
kyber_ntt(sp + i * KYBER_N);
@ -1166,8 +1220,8 @@ void kyber_encapsulate(const sword16* pub, sword16* bp, sword16* v,
/* For each polynomial in the vectors. */
for (i = 0; i < kp; ++i) {
/* Multiply at by sp into bp polynomial. */
kyber_pointwise_acc_mont(bp + i * KYBER_N, at + i * kp * KYBER_N, sp,
kp);
kyber_pointwise_acc_mont(bp + i * KYBER_N, at + i * kp * KYBER_N,
sp, kp);
/* Inverse transform bp polynomial. */
kyber_invntt(bp + i * KYBER_N);
/* Add errors to bp and reduce. */
@ -1178,6 +1232,7 @@ void kyber_encapsulate(const sword16* pub, sword16* bp, sword16* v,
kyber_pointwise_acc_mont(v, pub, sp, kp);
/* Inverse transform v. */
kyber_invntt(v);
}
/* Add errors and message to v and reduce. */
kyber_add3_reduce(v, epp, m);
}
@ -1195,6 +1250,21 @@ void kyber_decapsulate(const sword16* priv, sword16* mp, sword16* bp,
{
int i;
#ifndef WOLFSSL_AARCH64_NO_SQRDMLSH
if (IS_AARCH64_RDM(cpuid_flags)) {
/* Transform bp. All of result used in calculation of mp. */
for (i = 0; i < kp; ++i) {
kyber_ntt_sqrdmlsh(bp + i * KYBER_N);
}
/* Multiply private key by bp into mp polynomial. */
kyber_pointwise_acc_mont(mp, priv, bp, kp);
/* Inverse transform mp. */
kyber_invntt_sqrdmlsh(mp);
}
else
#endif
{
/* Transform bp. All of result used in calculation of mp. */
for (i = 0; i < kp; ++i) {
kyber_ntt(bp + i * KYBER_N);
@ -1204,6 +1274,7 @@ void kyber_decapsulate(const sword16* priv, sword16* mp, sword16* bp,
kyber_pointwise_acc_mont(mp, priv, bp, kp);
/* Inverse transform mp. */
kyber_invntt(mp);
}
/* Subtract errors (mp) out of v and reduce into mp. */
kyber_rsub_reduce(mp, v);
}

View File

@ -309,6 +309,7 @@ struct Aes {
byte use_aes_hw_crypto;
#ifdef HAVE_AESGCM
byte use_pmull_hw_crypto;
byte use_sha3_hw_crypto;
#endif
#endif /* __aarch64__ && WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_HW_CRYPTO */
#ifdef WOLF_CRYPTO_CB
@ -841,6 +842,7 @@ WOLFSSL_API int wc_AesEaxFree(AesEax* eax);
#if defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
!defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
/* GHASH one block of data.
*
* XOR block into tag and GMULT with H.
@ -848,7 +850,7 @@ WOLFSSL_API int wc_AesEaxFree(AesEax* eax);
* @param [in, out] aes AES GCM object.
* @param [in] block Block of AAD or cipher text.
*/
#define GHASH_ONE_BLOCK(aes, block) \
#define GHASH_ONE_BLOCK_AARCH64(aes, block) \
do { \
xorbuf(AES_TAG(aes), block, WC_AES_BLOCK_SIZE); \
GMULT_AARCH64(AES_TAG(aes), aes->gcm.H); \

View File

@ -292,6 +292,8 @@ int kyber_cmp_avx2(const byte* a, const byte* b, int sz);
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM)
WOLFSSL_LOCAL void kyber_ntt(sword16* r);
WOLFSSL_LOCAL void kyber_invntt(sword16* r);
WOLFSSL_LOCAL void kyber_ntt_sqrdmlsh(sword16* r);
WOLFSSL_LOCAL void kyber_invntt_sqrdmlsh(sword16* r);
WOLFSSL_LOCAL void kyber_basemul_mont(sword16* r, const sword16* a,
const sword16* b);
WOLFSSL_LOCAL void kyber_basemul_mont_add(sword16* r, const sword16* a,
@ -301,6 +303,7 @@ WOLFSSL_LOCAL void kyber_add3_reduce(sword16* r, const sword16* a,
const sword16* b);
WOLFSSL_LOCAL void kyber_rsub_reduce(sword16* r, const sword16* a);
WOLFSSL_LOCAL void kyber_to_mont(sword16* p);
WOLFSSL_LOCAL void kyber_to_mont_sqrdmlsh(sword16* p);
WOLFSSL_LOCAL void kyber_sha3_blocksx3_neon(word64* state);
WOLFSSL_LOCAL void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed);
WOLFSSL_LOCAL void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed);