Add ARMv8 PAA support to base v5.2.1 resulting in v5.2.3

This commit is contained in:
kaleb-himes
2024-01-25 14:07:46 -07:00
parent 844e961ff5
commit 91349b2599
19 changed files with 263774 additions and 63253 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,12 @@
/* armv8-sha256.c
*
* Copyright (C) 2006-2021 wolfSSL Inc.
* Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* Contact licensing@wolfssl.com with any questions or comments.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
* https://www.wolfssl.com
*/
@ -44,7 +34,15 @@
#include <wolfcrypt/src/misc.c>
#endif
#if defined(FREESCALE_MMCAU_SHA)
#ifdef FREESCALE_MMCAU_CLASSIC_SHA
#include "cau_api.h"
#else
#include "fsl_mmcau.h"
#endif
#endif
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
static const ALIGN32 word32 K[64] = {
0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
@ -60,6 +58,7 @@ static const ALIGN32 word32 K[64] = {
0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
};
#endif
static int InitSha256(wc_Sha256* sha256)
@ -70,6 +69,17 @@ static int InitSha256(wc_Sha256* sha256)
return BAD_FUNC_ARG;
}
#ifdef FREESCALE_MMCAU_SHA
ret = wolfSSL_CryptHwMutexLock();
if (ret == 0) {
#ifdef FREESCALE_MMCAU_CLASSIC_SHA
cau_sha256_initialize_output(sha256->digest);
#else
MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
#endif
wolfSSL_CryptHwMutexUnLock();
}
#else
sha256->digest[0] = 0x6A09E667L;
sha256->digest[1] = 0xBB67AE85L;
sha256->digest[2] = 0x3C6EF372L;
@ -78,11 +88,16 @@ static int InitSha256(wc_Sha256* sha256)
sha256->digest[5] = 0x9B05688CL;
sha256->digest[6] = 0x1F83D9ABL;
sha256->digest[7] = 0x5BE0CD19L;
#endif
sha256->buffLen = 0;
sha256->loLen = 0;
sha256->hiLen = 0;
#ifdef WOLFSSL_HASH_FLAGS
sha256->flags = 0;
#endif
return ret;
}
@ -94,6 +109,8 @@ static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
}
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
#ifdef __aarch64__
/* First block is in sha256->buffer and rest in data. */
@ -322,6 +339,7 @@ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 le
static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
{
byte* local;
const word32* k;
local = (byte*)sha256->buffer;
AddLength(sha256, sha256->buffLen); /* before adding pads */
@ -333,6 +351,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
k = K;
__asm__ volatile (
"LD1 {v4.2d-v7.2d}, %[buffer] \n"
"MOV v0.16b, v4.16b \n"
@ -474,8 +493,8 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"ADD v17.4s, v17.4s, v21.4s \n"
"STP q16, q17, %[out] \n"
: [out] "=m" (sha256->digest)
: [k] "r" (K), [digest] "m" (sha256->digest),
: [out] "=m" (sha256->digest), [k] "+r" (k)
: [digest] "m" (sha256->digest),
[buffer] "m" (sha256->buffer)
: "cc", "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11"
, "v12", "v13", "v14", "v15", "v16", "v17", "v18"
@ -510,6 +529,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
sizeof(word32));
k = K;
__asm__ volatile (
"#load in message and schedule updates \n"
"LD1 {v4.2d-v7.2d}, %[buffer] \n"
@ -652,8 +672,8 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"REV32 v17.16b, v17.16b \n"
#endif
"ST1 {v17.16b}, [%[hashOut]] \n"
: [hashOut] "=r" (hash)
: [k] "r" (K), [digest] "m" (sha256->digest),
: [hashOut] "=r" (hash), [k] "+r" (k)
: [digest] "m" (sha256->digest),
[buffer] "m" (sha256->buffer),
"0" (hash)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
@ -1306,6 +1326,157 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
#endif /* __aarch64__ */
#else /* WOLFSSL_ARMASM_NO_HW_CRYPTO */
#if defined(FREESCALE_MMCAU_SHA)
#ifndef WC_HASH_DATA_ALIGNMENT
/* these hardware API's require 4 byte (word32) alignment */
#define WC_HASH_DATA_ALIGNMENT 4
#endif
static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
word32 len)
{
int ret = wolfSSL_CryptHwMutexLock();
if (ret == 0) {
#if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
if ((wc_ptr_t)data % WC_HASH_DATA_ALIGNMENT) {
/* data pointer is NOT aligned,
* so copy and perform one block at a time */
byte* local = (byte*)sha256->buffer;
while (len >= WC_SHA256_BLOCK_SIZE) {
XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
#ifdef FREESCALE_MMCAU_CLASSIC_SHA
cau_sha256_hash_n(local, 1, sha256->digest);
#else
MMCAU_SHA256_HashN(local, 1, (uint32_t*)sha256->digest);
#endif
data += WC_SHA256_BLOCK_SIZE;
len -= WC_SHA256_BLOCK_SIZE;
}
}
else
#endif
{
#ifdef FREESCALE_MMCAU_CLASSIC_SHA
cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE,
sha256->digest);
#else
MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE,
(uint32_t*)sha256->digest);
#endif
}
wolfSSL_CryptHwMutexUnLock();
}
return ret;
}
#else /* */
extern void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
word32 len);
#endif
/* ARMv8 hardware acceleration Aarch32 and Thumb2 */
static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
{
int ret = 0;
/* do block size increments */
byte* local = (byte*)sha256->buffer;
word32 blocksLen;
/* check that internal buffLen is valid */
if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE)
return BUFFER_E;
AddLength(sha256, len);
if (sha256->buffLen > 0) {
word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
if (add > 0) {
XMEMCPY(&local[sha256->buffLen], data, add);
sha256->buffLen += add;
data += add;
len -= add;
}
if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) {
Transform_Sha256_Len(sha256, (const byte*)sha256->buffer,
WC_SHA256_BLOCK_SIZE);
sha256->buffLen = 0;
}
}
blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
if (blocksLen > 0) {
/* Byte reversal performed in function if required. */
Transform_Sha256_Len(sha256, data, blocksLen);
data += blocksLen;
len -= blocksLen;
}
if (len > 0) {
XMEMCPY(local, data, len);
sha256->buffLen = len;
}
return ret;
}
static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
{
byte* local = (byte*)sha256->buffer;
if (sha256 == NULL) {
return BAD_FUNC_ARG;
}
local[sha256->buffLen++] = 0x80; /* add 1 */
/* pad with zeros */
if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE -
sha256->buffLen);
sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
Transform_Sha256_Len(sha256, (const byte*)sha256->buffer,
WC_SHA256_BLOCK_SIZE);
sha256->buffLen = 0;
}
XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
/* put lengths in bits */
sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
(sha256->hiLen << 3);
sha256->loLen = sha256->loLen << 3;
/* store lengths */
/* ! length ordering dependent on digest endian type ! */
sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2] = sha256->hiLen;
sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 1] = sha256->loLen;
ByteReverseWords(
&(sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2]),
&(sha256->buffer[WC_SHA256_BLOCK_SIZE / sizeof(word32) - 2]),
WC_SHA256_BLOCK_SIZE - WC_SHA256_PAD_SIZE);
Transform_Sha256_Len(sha256, (const byte*)sha256->buffer,
WC_SHA256_BLOCK_SIZE);
#ifdef LITTLE_ENDIAN_ORDER
ByteReverseWords((word32*)hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
#else
XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
#endif
return 0;
}
#endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */
#ifndef NO_SHA256
@ -1315,6 +1486,9 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
return BAD_FUNC_ARG;
sha256->heap = heap;
#ifdef WOLF_CRYPTO_CB
sha256->devId = devId;
#endif
(void)devId;
return InitSha256(sha256);
@ -1430,7 +1604,11 @@ int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data)
#else
XMEMCPY(sha256->buffer, data, WC_SHA256_BLOCK_SIZE);
#endif
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
Sha256Transform(sha256, data, 1);
#else
Transform_Sha256_Len(sha256, data, WC_SHA256_BLOCK_SIZE);
#endif
return 0;
}
#endif
@ -1461,6 +1639,9 @@ int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data)
sha224->loLen = 0;
sha224->hiLen = 0;
#ifdef WOLFSSL_HASH_FLAGS
sha224->flags = 0;
#endif
return ret;
}

View File

@ -0,0 +1,207 @@
/* armv8-sha3-asm
*
* Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved.
*
* This file is part of wolfSSL.
*
* Contact licensing@wolfssl.com with any questions or comments.
*
* https://www.wolfssl.com
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.S
*/
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA3
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
#ifndef __APPLE__
.text
.type L_SHA3_transform_crypto_r, %object
.section .rodata
.size L_SHA3_transform_crypto_r, 192
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 3
#else
.p2align 3
#endif /* __APPLE__ */
L_SHA3_transform_crypto_r:
.xword 0x1
.xword 0x8082
.xword 0x800000000000808a
.xword 0x8000000080008000
.xword 0x808b
.xword 0x80000001
.xword 0x8000000080008081
.xword 0x8000000000008009
.xword 0x8a
.xword 0x88
.xword 0x80008009
.xword 0x8000000a
.xword 0x8000808b
.xword 0x800000000000008b
.xword 0x8000000000008089
.xword 0x8000000000008003
.xword 0x8000000000008002
.xword 0x8000000000000080
.xword 0x800a
.xword 0x800000008000000a
.xword 0x8000000080008081
.xword 0x8000000000008080
.xword 0x80000001
.xword 0x8000000080008008
#ifndef __APPLE__
.text
.globl BlockSha3
.type BlockSha3,@function
.align 2
BlockSha3:
#else
.section __TEXT,__text
.globl _BlockSha3
.p2align 2
_BlockSha3:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-80]!
add x29, sp, #0
stp d8, d9, [x29, #16]
stp d10, d11, [x29, #32]
stp d12, d13, [x29, #48]
stp d14, d15, [x29, #64]
#ifdef __APPLE__
.arch_extension sha3
#endif /* __APPLE__ */
#ifndef __APPLE__
adrp x1, L_SHA3_transform_crypto_r
add x1, x1, :lo12:L_SHA3_transform_crypto_r
#else
adrp x1, L_SHA3_transform_crypto_r@PAGE
add x1, x1, :lo12:L_SHA3_transform_crypto_r@PAGEOFF
#endif /* __APPLE__ */
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
ld4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
ld4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
ld4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
ld4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
ld4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
ld1 {v24.1d}, [x0]
sub x0, x0, #0xc0
mov x2, #24
# Start of 24 rounds
L_sha3_crypto_begin:
# Col Mix
eor3 v31.16b, v0.16b, v5.16b, v10.16b
eor3 v27.16b, v1.16b, v6.16b, v11.16b
eor3 v28.16b, v2.16b, v7.16b, v12.16b
eor3 v29.16b, v3.16b, v8.16b, v13.16b
eor3 v30.16b, v4.16b, v9.16b, v14.16b
eor3 v31.16b, v31.16b, v15.16b, v20.16b
eor3 v27.16b, v27.16b, v16.16b, v21.16b
eor3 v28.16b, v28.16b, v17.16b, v22.16b
eor3 v29.16b, v29.16b, v18.16b, v23.16b
eor3 v30.16b, v30.16b, v19.16b, v24.16b
rax1 v25.2d, v30.2d, v27.2d
rax1 v26.2d, v31.2d, v28.2d
rax1 v27.2d, v27.2d, v29.2d
rax1 v28.2d, v28.2d, v30.2d
rax1 v29.2d, v29.2d, v31.2d
eor v0.16b, v0.16b, v25.16b
xar v30.2d, v1.2d, v26.2d, #63
xar v1.2d, v6.2d, v26.2d, #20
xar v6.2d, v9.2d, v29.2d, #44
xar v9.2d, v22.2d, v27.2d, #3
xar v22.2d, v14.2d, v29.2d, #25
xar v14.2d, v20.2d, v25.2d, #46
xar v20.2d, v2.2d, v27.2d, #2
xar v2.2d, v12.2d, v27.2d, #21
xar v12.2d, v13.2d, v28.2d, #39
xar v13.2d, v19.2d, v29.2d, #56
xar v19.2d, v23.2d, v28.2d, #8
xar v23.2d, v15.2d, v25.2d, #23
xar v15.2d, v4.2d, v29.2d, #37
xar v4.2d, v24.2d, v29.2d, #50
xar v24.2d, v21.2d, v26.2d, #62
xar v21.2d, v8.2d, v28.2d, #9
xar v8.2d, v16.2d, v26.2d, #19
xar v16.2d, v5.2d, v25.2d, #28
xar v5.2d, v3.2d, v28.2d, #36
xar v3.2d, v18.2d, v28.2d, #43
xar v18.2d, v17.2d, v27.2d, #49
xar v17.2d, v11.2d, v26.2d, #54
xar v11.2d, v7.2d, v27.2d, #58
xar v7.2d, v10.2d, v25.2d, #61
# Row Mix
mov v25.16b, v0.16b
mov v26.16b, v1.16b
bcax v0.16b, v25.16b, v2.16b, v26.16b
bcax v1.16b, v26.16b, v3.16b, v2.16b
bcax v2.16b, v2.16b, v4.16b, v3.16b
bcax v3.16b, v3.16b, v25.16b, v4.16b
bcax v4.16b, v4.16b, v26.16b, v25.16b
mov v25.16b, v5.16b
mov v26.16b, v6.16b
bcax v5.16b, v25.16b, v7.16b, v26.16b
bcax v6.16b, v26.16b, v8.16b, v7.16b
bcax v7.16b, v7.16b, v9.16b, v8.16b
bcax v8.16b, v8.16b, v25.16b, v9.16b
bcax v9.16b, v9.16b, v26.16b, v25.16b
mov v26.16b, v11.16b
bcax v10.16b, v30.16b, v12.16b, v26.16b
bcax v11.16b, v26.16b, v13.16b, v12.16b
bcax v12.16b, v12.16b, v14.16b, v13.16b
bcax v13.16b, v13.16b, v30.16b, v14.16b
bcax v14.16b, v14.16b, v26.16b, v30.16b
mov v25.16b, v15.16b
mov v26.16b, v16.16b
bcax v15.16b, v25.16b, v17.16b, v26.16b
bcax v16.16b, v26.16b, v18.16b, v17.16b
bcax v17.16b, v17.16b, v19.16b, v18.16b
bcax v18.16b, v18.16b, v25.16b, v19.16b
bcax v19.16b, v19.16b, v26.16b, v25.16b
mov v25.16b, v20.16b
mov v26.16b, v21.16b
bcax v20.16b, v25.16b, v22.16b, v26.16b
bcax v21.16b, v26.16b, v23.16b, v22.16b
bcax v22.16b, v22.16b, v24.16b, v23.16b
bcax v23.16b, v23.16b, v25.16b, v24.16b
bcax v24.16b, v24.16b, v26.16b, v25.16b
ld1r {v30.2d}, [x1], #8
subs x2, x2, #1
eor v0.16b, v0.16b, v30.16b
bne L_sha3_crypto_begin
st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0], #32
st4 {v4.d, v5.d, v6.d, v7.d}[0], [x0], #32
st4 {v8.d, v9.d, v10.d, v11.d}[0], [x0], #32
st4 {v12.d, v13.d, v14.d, v15.d}[0], [x0], #32
st4 {v16.d, v17.d, v18.d, v19.d}[0], [x0], #32
st4 {v20.d, v21.d, v22.d, v23.d}[0], [x0], #32
st1 {v24.1d}, [x0]
ldp d8, d9, [x29, #16]
ldp d10, d11, [x29, #32]
ldp d12, d13, [x29, #48]
ldp d14, d15, [x29, #64]
ldp x29, x30, [sp], #0x50
ret
#ifndef __APPLE__
.size BlockSha3,.-BlockSha3
#endif /* __APPLE__ */
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
#endif /* WOLFSSL_SHA3 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#endif /* !WOLFSSL_ARMASM_INLINE */

View File

@ -0,0 +1,178 @@
/* armv8-sha3-asm
*
* Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved.
*
* This file is part of wolfSSL.
*
* Contact licensing@wolfssl.com with any questions or comments.
*
* https://www.wolfssl.com
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.c
*/
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifdef WOLFSSL_ARMASM_INLINE
#include <wolfssl/wolfcrypt/sha3.h>
#ifdef WOLFSSL_SHA3
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
static const uint64_t L_SHA3_transform_crypto_r[] = {
0x1UL,
0x8082UL,
0x800000000000808aUL,
0x8000000080008000UL,
0x808bUL,
0x80000001UL,
0x8000000080008081UL,
0x8000000000008009UL,
0x8aUL,
0x88UL,
0x80008009UL,
0x8000000aUL,
0x8000808bUL,
0x800000000000008bUL,
0x8000000000008089UL,
0x8000000000008003UL,
0x8000000000008002UL,
0x8000000000000080UL,
0x800aUL,
0x800000008000000aUL,
0x8000000080008081UL,
0x8000000000008080UL,
0x80000001UL,
0x8000000080008008UL,
};
void BlockSha3(unsigned long* state)
{
__asm__ __volatile__ (
#ifdef __APPLE__
".arch_extension sha3\n\t"
#endif /* __APPLE__ */
#ifndef __APPLE__
"adrp x1, %[L_SHA3_transform_crypto_r]\n\t"
"add x1, x1, :lo12:%[L_SHA3_transform_crypto_r]\n\t"
#else
"adrp x1, %[L_SHA3_transform_crypto_r]@PAGE\n\t"
"add x1, x1, %[L_SHA3_transform_crypto_r]@PAGEOFF\n\t"
#endif /* __APPLE__ */
"ld4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
"ld4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
"ld4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
"ld4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
"ld4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
"ld4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
"ld1 {v24.1d}, [%x[state]]\n\t"
"sub %x[state], %x[state], #0xc0\n\t"
"mov x2, #24\n\t"
/* Start of 24 rounds */
"\n"
"L_sha3_crypto_begin_%=: \n\t"
/* Col Mix */
"eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t"
"eor3 v27.16b, v1.16b, v6.16b, v11.16b\n\t"
"eor3 v28.16b, v2.16b, v7.16b, v12.16b\n\t"
"eor3 v29.16b, v3.16b, v8.16b, v13.16b\n\t"
"eor3 v30.16b, v4.16b, v9.16b, v14.16b\n\t"
"eor3 v31.16b, v31.16b, v15.16b, v20.16b\n\t"
"eor3 v27.16b, v27.16b, v16.16b, v21.16b\n\t"
"eor3 v28.16b, v28.16b, v17.16b, v22.16b\n\t"
"eor3 v29.16b, v29.16b, v18.16b, v23.16b\n\t"
"eor3 v30.16b, v30.16b, v19.16b, v24.16b\n\t"
"rax1 v25.2d, v30.2d, v27.2d\n\t"
"rax1 v26.2d, v31.2d, v28.2d\n\t"
"rax1 v27.2d, v27.2d, v29.2d\n\t"
"rax1 v28.2d, v28.2d, v30.2d\n\t"
"rax1 v29.2d, v29.2d, v31.2d\n\t"
"eor v0.16b, v0.16b, v25.16b\n\t"
"xar v30.2d, v1.2d, v26.2d, #63\n\t"
"xar v1.2d, v6.2d, v26.2d, #20\n\t"
"xar v6.2d, v9.2d, v29.2d, #44\n\t"
"xar v9.2d, v22.2d, v27.2d, #3\n\t"
"xar v22.2d, v14.2d, v29.2d, #25\n\t"
"xar v14.2d, v20.2d, v25.2d, #46\n\t"
"xar v20.2d, v2.2d, v27.2d, #2\n\t"
"xar v2.2d, v12.2d, v27.2d, #21\n\t"
"xar v12.2d, v13.2d, v28.2d, #39\n\t"
"xar v13.2d, v19.2d, v29.2d, #56\n\t"
"xar v19.2d, v23.2d, v28.2d, #8\n\t"
"xar v23.2d, v15.2d, v25.2d, #23\n\t"
"xar v15.2d, v4.2d, v29.2d, #37\n\t"
"xar v4.2d, v24.2d, v29.2d, #50\n\t"
"xar v24.2d, v21.2d, v26.2d, #62\n\t"
"xar v21.2d, v8.2d, v28.2d, #9\n\t"
"xar v8.2d, v16.2d, v26.2d, #19\n\t"
"xar v16.2d, v5.2d, v25.2d, #28\n\t"
"xar v5.2d, v3.2d, v28.2d, #36\n\t"
"xar v3.2d, v18.2d, v28.2d, #43\n\t"
"xar v18.2d, v17.2d, v27.2d, #49\n\t"
"xar v17.2d, v11.2d, v26.2d, #54\n\t"
"xar v11.2d, v7.2d, v27.2d, #58\n\t"
"xar v7.2d, v10.2d, v25.2d, #61\n\t"
/* Row Mix */
"mov v25.16b, v0.16b\n\t"
"mov v26.16b, v1.16b\n\t"
"bcax v0.16b, v25.16b, v2.16b, v26.16b\n\t"
"bcax v1.16b, v26.16b, v3.16b, v2.16b\n\t"
"bcax v2.16b, v2.16b, v4.16b, v3.16b\n\t"
"bcax v3.16b, v3.16b, v25.16b, v4.16b\n\t"
"bcax v4.16b, v4.16b, v26.16b, v25.16b\n\t"
"mov v25.16b, v5.16b\n\t"
"mov v26.16b, v6.16b\n\t"
"bcax v5.16b, v25.16b, v7.16b, v26.16b\n\t"
"bcax v6.16b, v26.16b, v8.16b, v7.16b\n\t"
"bcax v7.16b, v7.16b, v9.16b, v8.16b\n\t"
"bcax v8.16b, v8.16b, v25.16b, v9.16b\n\t"
"bcax v9.16b, v9.16b, v26.16b, v25.16b\n\t"
"mov v26.16b, v11.16b\n\t"
"bcax v10.16b, v30.16b, v12.16b, v26.16b\n\t"
"bcax v11.16b, v26.16b, v13.16b, v12.16b\n\t"
"bcax v12.16b, v12.16b, v14.16b, v13.16b\n\t"
"bcax v13.16b, v13.16b, v30.16b, v14.16b\n\t"
"bcax v14.16b, v14.16b, v26.16b, v30.16b\n\t"
"mov v25.16b, v15.16b\n\t"
"mov v26.16b, v16.16b\n\t"
"bcax v15.16b, v25.16b, v17.16b, v26.16b\n\t"
"bcax v16.16b, v26.16b, v18.16b, v17.16b\n\t"
"bcax v17.16b, v17.16b, v19.16b, v18.16b\n\t"
"bcax v18.16b, v18.16b, v25.16b, v19.16b\n\t"
"bcax v19.16b, v19.16b, v26.16b, v25.16b\n\t"
"mov v25.16b, v20.16b\n\t"
"mov v26.16b, v21.16b\n\t"
"bcax v20.16b, v25.16b, v22.16b, v26.16b\n\t"
"bcax v21.16b, v26.16b, v23.16b, v22.16b\n\t"
"bcax v22.16b, v22.16b, v24.16b, v23.16b\n\t"
"bcax v23.16b, v23.16b, v25.16b, v24.16b\n\t"
"bcax v24.16b, v24.16b, v26.16b, v25.16b\n\t"
"ld1r {v30.2d}, [x1], #8\n\t"
"subs x2, x2, #1\n\t"
"eor v0.16b, v0.16b, v30.16b\n\t"
"bne L_sha3_crypto_begin_%=\n\t"
"st4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
"st4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
"st4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
"st4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
"st4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
"st4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
"st1 {v24.1d}, [%x[state]]\n\t"
: [state] "+r" (state)
: [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r)
: "memory", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc"
);
}
#endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */
#endif /* WOLFSSL_SHA3 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -1,22 +1,12 @@
/* armv8-sha512-asm
*
* Copyright (C) 2006-2021 wolfSSL Inc.
* Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* Contact licensing@wolfssl.com with any questions or comments.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
* https://www.wolfssl.com
*/
#ifdef HAVE_CONFIG_H
@ -30,6 +20,7 @@
*/
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA512
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
#ifndef __APPLE__
@ -1201,20 +1192,15 @@ Transform_Sha512_Len_crypto:
.p2align 2
_Transform_Sha512_Len_crypto:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-208]!
stp x29, x30, [sp, #-80]!
add x29, sp, #0
stp d8, d9, [x29, #16]
stp d10, d11, [x29, #32]
stp d12, d13, [x29, #48]
stp d14, d15, [x29, #64]
stp d16, d17, [x29, #80]
stp d18, d19, [x29, #96]
stp d20, d21, [x29, #112]
stp d22, d23, [x29, #128]
stp d24, d25, [x29, #144]
stp d26, d27, [x29, #160]
stp d28, d29, [x29, #176]
stp d30, d31, [x29, #192]
#ifdef __APPLE__
.arch_extension sha3
#endif /* __APPLE__ */
#ifndef __APPLE__
adrp x4, L_SHA512_transform_crypto_len_k
add x4, x4, :lo12:L_SHA512_transform_crypto_len_k
@ -1222,7 +1208,7 @@ _Transform_Sha512_Len_crypto:
adrp x4, L_SHA512_transform_crypto_len_k@PAGE
add x4, x4, :lo12:L_SHA512_transform_crypto_len_k@PAGEOFF
#endif /* __APPLE__ */
# Load first 16 64-bit words of K permantly
# Load first 16 64-bit words of K permanently
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x4], #0x40
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x4], #0x40
# Load digest into working vars
@ -1731,15 +1717,7 @@ L_sha512_len_crypto_begin:
ldp d10, d11, [x29, #32]
ldp d12, d13, [x29, #48]
ldp d14, d15, [x29, #64]
ldp d16, d17, [x29, #80]
ldp d18, d19, [x29, #96]
ldp d20, d21, [x29, #112]
ldp d22, d23, [x29, #128]
ldp d24, d25, [x29, #144]
ldp d26, d27, [x29, #160]
ldp d28, d29, [x29, #176]
ldp d30, d31, [x29, #192]
ldp x29, x30, [sp], #0xd0
ldp x29, x30, [sp], #0x50
ret
#ifndef __APPLE__
.size Transform_Sha512_Len_crypto,.-Transform_Sha512_Len_crypto
@ -1752,3 +1730,4 @@ L_sha512_len_crypto_begin:
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#endif /* !WOLFSSL_ARMASM_INLINE */

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,12 @@
/* sha512.c
*
* Copyright (C) 2006-2021 wolfSSL Inc.
* Copyright (C) 2006-2023 wolfSSL Inc. All rights reserved.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* Contact licensing@wolfssl.com with any questions or comments.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
* https://www.wolfssl.com
*/
#ifdef HAVE_CONFIG_H
@ -146,18 +136,6 @@ static int InitSha512_256(wc_Sha512* sha512)
#ifdef WOLFSSL_SHA512
#ifdef WOLFSSL_ARMASM
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
extern void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_neon
#else
extern void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_crypto
#endif
#endif
static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId,
enum wc_HashType type)
{
@ -467,7 +445,25 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1);
if (blocksLen > 0) {
/* Byte reversal performed in function if required. */
Transform_Sha512_Len(sha512, data, blocksLen);
#ifndef WOLFSSL_ARMASM_NO_NEON
/* Data must be 64-bit aligned to be passed to Transform_Sha512_Len().
* 64 bits is 8 bytes.
*/
if (((size_t)data & 0x7) != 0) {
word32 i;
for (i = 0; i < blocksLen; i += WC_SHA512_BLOCK_SIZE) {
word64 buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64)];
XMEMCPY(buffer, data + i, WC_SHA512_BLOCK_SIZE);
Transform_Sha512_Len(sha512, (const byte*)buffer,
WC_SHA512_BLOCK_SIZE);
}
}
else
#endif
{
Transform_Sha512_Len(sha512, data, blocksLen);
}
data += blocksLen;
len -= blocksLen;
}
@ -792,6 +788,8 @@ void wc_Sha384Free(wc_Sha384* sha384)
#ifdef WOLFSSL_SHA512
#if !defined(WOLFSSL_NOSHA512_224) || !defined(WOLFSSL_NOSHA512_256)
static int Sha512_Family_GetHash(wc_Sha512* sha512, byte* hash,
enum wc_HashType type )
{
@ -828,6 +826,8 @@ static int Sha512_Family_GetHash(wc_Sha512* sha512, byte* hash,
return ret;
}
#endif /* !WOLFSSL_NOSHA512_224 || !WOLFSSL_NOSHA512_256 */
int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash)
{
int ret;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,27 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#ifndef NO_AES
#ifdef HAVE_AESGCM
typedef struct Gcm {
ALIGN16 byte H[16];
#ifdef OPENSSL_EXTRA
word32 aadH[4]; /* additional authenticated data GHASH */
word32 aadLen; /* additional authenticated data len */
#endif
#ifdef GCM_TABLE
/* key-based fast multiplication table. */
ALIGN16 byte M0[256][16];
#elif defined(GCM_TABLE_4BIT)
#if defined(BIG_ENDIAN_ORDER) || defined(WC_16BIT_CPU)
ALIGN16 byte M0[16][16];
#else
ALIGN16 byte M0[32][16];
#endif
#endif /* GCM_TABLE */
} Gcm;
#endif
#if defined(HAVE_FIPS) && \
defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
#include <wolfssl/wolfcrypt/fips.h>
@ -177,6 +198,7 @@ struct Aes {
#endif
#ifdef HAVE_AESGCM
ALIGN16 byte H[AES_BLOCK_SIZE];
Gcm gcm;
#ifdef OPENSSL_EXTRA
word32 aadH[4]; /* additional authenticated data GHASH */
word32 aadLen; /* additional authenticated data len */
@ -377,8 +399,13 @@ WOLFSSL_API int wc_AesEcbDecrypt(Aes* aes, byte* out,
WOLFSSL_API __must_check int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in);
WOLFSSL_API __must_check int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in);
#else
WOLFSSL_API void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in);
WOLFSSL_API void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in);
#ifndef WOLFSSL_ARMASM
WOLFSSL_API void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in);
WOLFSSL_API void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in);
#else
WOLFSSL_API int wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in);
WOLFSSL_API int wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in);
#endif
#endif
WOLFSSL_API int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
const byte* iv, int dir);
@ -449,8 +476,10 @@ WOLFSSL_API int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag,
const byte* authIn, word32 authInSz,
const byte* authTag, word32 authTagSz);
#endif /* WC_NO_RNG */
WOLFSSL_LOCAL void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
#ifndef WOLFSSL_ARMASM
WOLFSSL_LOCAL void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
word32 cSz, byte* s, word32 sSz);
#endif
#endif /* HAVE_AESGCM */
#ifdef HAVE_AESCCM
WOLFSSL_LOCAL int wc_AesCcmCheckTagSize(int sz);

View File

@ -204,6 +204,22 @@ struct wc_Sha512 {
#ifdef WOLFSSL_SHA512
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_CRYPTO_SHA512
void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_neon
#else
void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
word32 len);
#define Transform_Sha512_Len Transform_Sha512_Len_crypto
#endif
#else
extern void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data,
word32 len);
#endif
#endif
WOLFSSL_API int wc_InitSha512(wc_Sha512*);
WOLFSSL_API int wc_InitSha512_ex(wc_Sha512*, void*, int);