AES ARM32 and Thumb2 ASM: fixup ARM32 and add Thumb2

Fix which functions and data are compiled in depending on defines.
Better handing of constants.
Also fix Aarch64 ed25519 inline assembly.
This commit is contained in:
Sean Parkinson
2023-08-25 13:40:15 +10:00
parent 70c362f680
commit 0638ec234b
22 changed files with 6511 additions and 652 deletions

View File

@ -2268,7 +2268,7 @@ AC_ARG_ENABLE([aescbc],
if test "$ENABLED_AESCBC" = "no"
then
AM_CFLAGS="$AM_CFLAGS -DNO_AES_CBC"
AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_AES_CBC"
AM_CCASFLAGS="$AM_CCASFLAGS -DNO_AES_CBC"
fi
# AES-CBC length checks (checks that input lengths are multiples of block size)

View File

@ -157,8 +157,8 @@ endif
if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
if BUILD_ARMASM_NEON
if !BUILD_ARMASM_CRYPTO
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
@ -166,7 +166,15 @@ else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif !BUILD_ARMASM_CRYPTO
endif BUILD_ARMASM_NEON
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif BUILD_AES
if BUILD_AESNI
@ -401,16 +409,26 @@ endif
if !BUILD_FIPS_CURRENT
if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
if BUILD_ARMASM_NEON
if !BUILD_ARMASM_CRYPTO
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif !BUILD_ARMASM_CRYPTO
endif BUILD_ARMASM_NEON
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
endif !BUILD_ARMASM_NEON
if BUILD_AFALG
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/af_alg/afalg_aes.c
endif BUILD_AFALG

View File

@ -97,7 +97,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#include <wolfcrypt/src/misc.c>
#endif
#if !defined(WOLFSSL_ARMASM) || defined(WOLFSSL_ARMASM_NO_NEON)
#ifndef WOLFSSL_ARMASM
#ifdef WOLFSSL_IMX6_CAAM_BLOB
/* case of possibly not using hardware acceleration for AES but using key
@ -4573,7 +4573,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
#endif /* NEED_AES_CTR_SOFT */
#endif /* WOLFSSL_AES_COUNTER */
#endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_NEON */
#endif /* !WOLFSSL_ARMASM */
/*
@ -4620,7 +4620,7 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
#endif
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
/* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
#elif defined(WOLFSSL_AFALG)
@ -8851,7 +8851,7 @@ int wc_AesCcmCheckTagSize(int sz)
return 0;
}
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
/* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */
#elif defined(HAVE_COLDFIRE_SEC)

View File

@ -921,10 +921,14 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz)
{
ge_p2 g;
byte bArray[ED25519_KEY_SIZE];
byte x[ED25519_KEY_SIZE];
byte y[ED25519_KEY_SIZE];
word32 i;
fe_frombytes(g.X, xIn);
fe_frombytes(g.Y, yIn);
XMEMCPY(x, xIn, ED25519_KEY_SIZE);
XMEMCPY(y, yIn, ED25519_KEY_SIZE);
fe_frombytes(g.X, x);
fe_frombytes(g.Y, y);
fe_1(g.Z);
ge_tobytes(bArray, &g);

View File

@ -33,6 +33,7 @@
#if !defined(__aarch64__) && defined(__arm__)
#ifndef WOLFSSL_ARMASM_INLINE
#ifndef NO_AES
#ifdef HAVE_AES_DECRYPT
.text
.type L_AES_ARM32_td_data, %object
.size L_AES_ARM32_td_data, 1024
@ -294,6 +295,8 @@ L_AES_ARM32_td_data:
.word 0x70d532b6
.word 0x74486c5c
.word 0x42d0b857
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te_data, %object
.size L_AES_ARM32_te_data, 1024
@ -555,18 +558,23 @@ L_AES_ARM32_te_data:
.word 0xfca85454
.word 0xd66dbbbb
.word 0x3a2c1616
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
.text
.type L_AES_ARM32_td, %object
.size L_AES_ARM32_td, 12
.align 4
L_AES_ARM32_td:
.word L_AES_ARM32_td_data
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te, %object
.size L_AES_ARM32_te, 12
.align 4
L_AES_ARM32_te:
.word L_AES_ARM32_te_data
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
.text
.align 4
@ -574,8 +582,10 @@ L_AES_ARM32_te:
.type AES_invert_key, %function
AES_invert_key:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ldr r12, L_AES_ARM32_te
ldr lr, L_AES_ARM32_td
adr r12, L_AES_ARM32_te
ldr r12, [r12]
adr lr, L_AES_ARM32_td
ldr lr, [lr]
add r10, r0, r1, lsl #4
mov r11, r1
L_AES_invert_key_loop:
@ -681,7 +691,8 @@ L_AES_ARM32_rcon:
.type AES_set_encrypt_key, %function
AES_set_encrypt_key:
push {r4, r5, r6, r7, r8, lr}
ldr r8, L_AES_ARM32_te
adr r8, L_AES_ARM32_te
ldr r8, [r8]
adr lr, L_AES_ARM32_rcon
cmp r1, #0x80
beq L_AES_set_encrypt_key_start_128
@ -911,7 +922,6 @@ L_AES_set_encrypt_key_loop_128:
L_AES_set_encrypt_key_end:
pop {r4, r5, r6, r7, r8, pc}
.size AES_set_encrypt_key,.-AES_set_encrypt_key
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.align 4
.globl AES_encrypt_block
@ -1123,12 +1133,14 @@ L_AES_encrypt_block_nr:
eor r7, r7, r11
pop {pc}
.size AES_encrypt_block,.-AES_encrypt_block
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te_ecb, %object
.size L_AES_ARM32_te_ecb, 12
.align 4
L_AES_ARM32_te_ecb:
.word L_AES_ARM32_te_data
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.align 4
@ -1137,7 +1149,8 @@ L_AES_ARM32_te_ecb:
AES_ECB_encrypt:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldr r12, [sp, #36]
push {r3}
cmp r12, #10
@ -1259,7 +1272,8 @@ AES_CBC_encrypt:
ldr r8, [sp, #36]
ldr r9, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldm r9, {r4, r5, r6, r7}
push {r3, r9}
cmp r8, #10
@ -1394,7 +1408,8 @@ AES_CTR_encrypt:
ldr r12, [sp, #36]
ldr r8, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldm r8, {r4, r5, r6, r7}
rev r4, r4
rev r5, r5
@ -1540,7 +1555,6 @@ L_AES_CTR_encrypt_end:
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size AES_CTR_encrypt,.-AES_CTR_encrypt
#endif /* WOLFSSL_AES_COUNTER */
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
.text
@ -2030,7 +2044,8 @@ AES_ECB_decrypt:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ldr r8, [sp, #36]
mov lr, r0
ldr r0, L_AES_ARM32_td_ecb
adr r0, L_AES_ARM32_td_ecb
ldr r0, [r0]
adr r12, L_AES_ARM32_td4
cmp r8, #10
beq L_AES_ECB_decrypt_start_block_128
@ -2147,7 +2162,8 @@ AES_CBC_decrypt:
ldr r8, [sp, #36]
ldr r4, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_td_ecb
adr r0, L_AES_ARM32_td_ecb
ldr r0, [r0]
adr r12, L_AES_ARM32_td4
push {r3, r4}
cmp r8, #10
@ -3118,7 +3134,8 @@ AES_GCM_encrypt:
ldr r12, [sp, #36]
ldr r8, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_gcm
adr r0, L_AES_ARM32_te_gcm
ldr r0, [r0]
ldm r8, {r4, r5, r6, r7}
rev r4, r4
rev r5, r5

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,10 +37,12 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifndef NO_AES
#include <wolfssl/wolfcrypt/aes.h>
#ifdef HAVE_AES_DECRYPT
static const uint32_t L_AES_ARM32_td_data[] = {
0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
@ -107,6 +110,8 @@ static const uint32_t L_AES_ARM32_td_data[] = {
0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
};
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t L_AES_ARM32_te_data[] = {
0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
@ -174,18 +179,25 @@ static const uint32_t L_AES_ARM32_te_data[] = {
0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
};
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
static const uint32_t* L_AES_ARM32_td = L_AES_ARM32_td_data;
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t* L_AES_ARM32_te = L_AES_ARM32_te_data;
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
void AES_invert_key(unsigned char* ks, word32 rounds);
void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
{
register unsigned char* ks asm ("r0") = ks_p;
register word32 rounds asm ("r1") = rounds_p;
register unsigned char* ks asm ("r0") = (unsigned char*)ks_p;
register word32 rounds asm ("r1") = (word32)rounds_p;
register uint32_t* L_AES_ARM32_te_c asm ("r2") = (uint32_t*)L_AES_ARM32_te;
register uint32_t* L_AES_ARM32_td_c asm ("r3") = (uint32_t*)L_AES_ARM32_td;
__asm__ __volatile__ (
"ldr r12, %[L_AES_ARM32_te]\n\t"
"ldr lr, %[L_AES_ARM32_td]\n\t"
"mov r12, %[L_AES_ARM32_te]\n\t"
"mov lr, %[L_AES_ARM32_td]\n\t"
"add r10, %[ks], %[rounds], lsl #4\n\t"
"mov r11, %[rounds]\n\t"
"\n"
@ -269,9 +281,9 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"str r8, [%[ks]], #4\n\t"
"subs r11, r11, #1\n\t"
"bne L_AES_invert_key_mix_loop_%=\n\t"
: [ks] "+r" (ks), [rounds] "+r" (rounds)
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_td] "g" (L_AES_ARM32_td)
: "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c)
:
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
@ -285,9 +297,11 @@ static const uint32_t L_AES_ARM32_rcon[] = {
void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks);
void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p)
{
register const unsigned char* key asm ("r0") = key_p;
register word32 len asm ("r1") = len_p;
register unsigned char* ks asm ("r2") = ks_p;
register const unsigned char* key asm ("r0") = (const unsigned char*)key_p;
register word32 len asm ("r1") = (word32)len_p;
register unsigned char* ks asm ("r2") = (unsigned char*)ks_p;
register uint32_t* L_AES_ARM32_te_c asm ("r3") = (uint32_t*)L_AES_ARM32_te;
register uint32_t* L_AES_ARM32_rcon_c asm ("r4") = (uint32_t*)&L_AES_ARM32_rcon;
__asm__ __volatile__ (
"mov r8, %[L_AES_ARM32_te]\n\t"
@ -524,20 +538,19 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"bne L_AES_set_encrypt_key_loop_128_%=\n\t"
"\n"
"L_AES_set_encrypt_key_end_%=: \n\t"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks)
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_rcon] "g" (L_AES_ARM32_rcon)
: "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c)
:
: "memory", "r12", "lr", "r5", "r6", "r7", "r8"
);
}
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks);
void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p)
{
register const uint32_t* te asm ("r0") = te_p;
register int nr asm ("r1") = nr_p;
register int len asm ("r2") = len_p;
register const uint32_t* ks asm ("r3") = ks_p;
register const uint32_t* te asm ("r0") = (const uint32_t*)te_p;
register int nr asm ("r1") = (int)nr_p;
register int len asm ("r2") = (int)len_p;
register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p;
__asm__ __volatile__ (
"\n"
@ -750,20 +763,23 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t
);
}
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data;
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r12, r4\n\t"
"push {%[ks]}\n\t"
"cmp r12, #10\n\t"
@ -878,9 +894,9 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"\n"
"L_AES_ECB_encrypt_end_%=: \n\t"
"pop {%[ks]}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
@ -890,18 +906,19 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* iv asm ("r5") = iv_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov r9, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"ldm r9, {r4, r5, r6, r7}\n\t"
"push {%[ks], r9}\n\t"
"cmp r8, #10\n\t"
@ -1029,9 +1046,9 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"L_AES_CBC_encrypt_end_%=: \n\t"
"pop {%[ks], r9}\n\t"
"stm r9, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
@ -1042,18 +1059,19 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* ctr asm ("r5") = ctr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov r12, r4\n\t"
"mov r8, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t"
"rev r4, r4\n\t"
"rev r5, r5\n\t"
@ -1202,23 +1220,22 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"rev r6, r6\n\t"
"rev r7, r7\n\t"
"stm r8, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;
}
#endif /* WOLFSSL_AES_COUNTER */
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
void AES_decrypt_block(const uint32_t* td, int nr);
void AES_decrypt_block(const uint32_t* td_p, int nr_p)
{
register const uint32_t* td asm ("r0") = td_p;
register int nr asm ("r1") = nr_p;
register const uint32_t* td asm ("r0") = (const uint32_t*)td_p;
register int nr asm ("r1") = (int)nr_p;
__asm__ __volatile__ (
"\n"
@ -1471,17 +1488,19 @@ static const unsigned char L_AES_ARM32_td4[] = {
void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_td_ecb;
register unsigned char* L_AES_ARM32_td4_c asm ("r6") = (unsigned char*)&L_AES_ARM32_td4;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
"ldr r12, %[L_AES_ARM32_td4]\n\t"
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
"mov r12, %[L_AES_ARM32_td4]\n\t"
"cmp r8, #10\n\t"
"beq L_AES_ECB_decrypt_start_block_128_%=\n\t"
"cmp r8, #12\n\t"
@ -1590,9 +1609,9 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"bne L_AES_ECB_decrypt_loop_block_128_%=\n\t"
"\n"
"L_AES_ECB_decrypt_end_%=: \n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
@ -1602,19 +1621,21 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* iv asm ("r5") = iv_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_td_ecb;
register unsigned char* L_AES_ARM32_td4_c asm ("r7") = (unsigned char*)&L_AES_ARM32_td4;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov r4, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
"ldr r12, %[L_AES_ARM32_td4]\n\t"
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
"mov r12, %[L_AES_ARM32_td4]\n\t"
"push {%[ks]-r4}\n\t"
"cmp r8, #10\n\t"
"beq L_AES_CBC_decrypt_loop_block_128_%=\n\t"
@ -1992,9 +2013,9 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"\n"
"L_AES_CBC_decrypt_end_%=: \n\t"
"pop {%[ks]-r4}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
: "memory", "r12", "lr", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
@ -2014,13 +2035,14 @@ static const uint32_t L_GCM_gmult_len_r[] = {
void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len);
void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p)
{
register unsigned char* x asm ("r0") = x_p;
register const unsigned char** m asm ("r1") = m_p;
register const unsigned char* data asm ("r2") = data_p;
register unsigned long len asm ("r3") = len_p;
register unsigned char* x asm ("r0") = (unsigned char*)x_p;
register const unsigned char** m asm ("r1") = (const unsigned char**)m_p;
register const unsigned char* data asm ("r2") = (const unsigned char*)data_p;
register unsigned long len asm ("r3") = (unsigned long)len_p;
register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
__asm__ __volatile__ (
"ldr lr, %[L_GCM_gmult_len_r]\n\t"
"mov lr, %[L_GCM_gmult_len_r]\n\t"
"\n"
"L_GCM_gmult_len_start_block_%=: \n\t"
"push {r3}\n\t"
@ -2568,9 +2590,9 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
"subs %[len], %[len], #16\n\t"
"add %[data], %[data], #16\n\t"
"bne L_GCM_gmult_len_start_block_%=\n\t"
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4), [L_GCM_gmult_len_r] "g" (L_GCM_gmult_len_r)
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
:
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
@ -2578,18 +2600,19 @@ static const uint32_t* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data;
void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* ctr asm ("r5") = ctr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
register uint32_t* L_AES_ARM32_te_gcm_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_gcm;
__asm__ __volatile__ (
"mov r12, r4\n\t"
"mov r8, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_gcm]\n\t"
"mov r0, %[L_AES_ARM32_te_gcm]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t"
"rev r4, r4\n\t"
"rev r5, r5\n\t"
@ -2729,9 +2752,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"rev r6, r6\n\t"
"rev r7, r7\n\t"
"stm r8, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
: [L_AES_ARM32_te_gcm] "g" (L_AES_ARM32_te_gcm)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,6 +37,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
/* Based on work by: Emil Lenngren
* https://github.com/pornin/X25519-Cortex-M4
@ -50,7 +52,6 @@
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
:
@ -62,7 +63,6 @@ void fe_init()
void fe_add_sub_op(void);
void fe_add_sub_op()
{
__asm__ __volatile__ (
/* Add-Sub */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
@ -269,7 +269,6 @@ void fe_add_sub_op()
void fe_sub_op(void);
void fe_sub_op()
{
__asm__ __volatile__ (
/* Sub */
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@ -307,9 +306,9 @@ void fe_sub_op()
void fe_sub(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_sub_op\n\t"
@ -322,7 +321,6 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p)
void fe_add_op(void);
void fe_add_op()
{
__asm__ __volatile__ (
/* Add */
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@ -361,9 +359,9 @@ void fe_add_op()
void fe_add(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_add_op\n\t"
@ -376,8 +374,8 @@ void fe_add(fe r_p, const fe a_p, const fe b_p)
#ifdef HAVE_ED25519
void fe_frombytes(fe out_p, const unsigned char* in_p)
{
register fe out asm ("r0") = out_p;
register const unsigned char* in asm ("r1") = in_p;
register sword32* out asm ("r0") = (sword32*)out_p;
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
__asm__ __volatile__ (
"ldm %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -391,8 +389,8 @@ void fe_frombytes(fe out_p, const unsigned char* in_p)
void fe_tobytes(unsigned char* out_p, const fe n_p)
{
register unsigned char* out asm ("r0") = out_p;
register const fe n asm ("r1") = n_p;
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
register const sword32* n asm ("r1") = (const sword32*)n_p;
__asm__ __volatile__ (
"ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -424,7 +422,7 @@ void fe_tobytes(unsigned char* out_p, const fe n_p)
void fe_1(fe n_p)
{
register fe n asm ("r0") = n_p;
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set one */
@ -463,7 +461,7 @@ void fe_1(fe n_p)
void fe_0(fe n_p)
{
register fe n asm ("r0") = n_p;
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set zero */
@ -501,8 +499,8 @@ void fe_0(fe n_p)
void fe_copy(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
/* Copy */
@ -562,8 +560,8 @@ void fe_copy(fe r_p, const fe a_p)
void fe_neg(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"mvn lr, #0\n\t"
@ -589,7 +587,7 @@ void fe_neg(fe r_p, const fe a_p)
int fe_isnonzero(const fe a_p)
{
register const fe a asm ("r0") = a_p;
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -628,7 +626,7 @@ int fe_isnonzero(const fe a_p)
int fe_isnegative(const fe a_p)
{
register const fe a asm ("r0") = a_p;
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"ldm %[a]!, {r2, r3, r4, r5}\n\t"
@ -655,9 +653,9 @@ int fe_isnegative(const fe a_p)
#ifndef WC_NO_CACHE_RESISTANT
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = r_p;
register fe* base asm ("r1") = base_p;
register signed char b asm ("r2") = b_p;
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"sxtb %[b], %[b]\n\t"
@ -2364,9 +2362,9 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
#else
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = r_p;
register fe* base asm ("r1") = base_p;
register signed char b asm ("r2") = b_p;
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"sxtb %[b], %[b]\n\t"
@ -2472,7 +2470,6 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
void fe_mul_op(void);
void fe_mul_op()
{
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
@ -2610,9 +2607,9 @@ void fe_mul_op()
void fe_mul(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_mul_op\n\t"
@ -2625,7 +2622,6 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p)
void fe_sq_op(void);
void fe_sq_op()
{
__asm__ __volatile__ (
"sub sp, sp, #32\n\t"
"str r0, [sp, #28]\n\t"
@ -2749,8 +2745,8 @@ void fe_sq_op()
void fe_sq(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"bl fe_sq_op\n\t"
@ -2762,8 +2758,8 @@ void fe_sq(fe r_p, const fe a_p)
void fe_mul121666(fe r_p, fe a_p)
{
register fe r asm ("r0") = r_p;
register fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register sword32* a asm ("r1") = (sword32*)a_p;
__asm__ __volatile__ (
/* Multiply by 121666 */
@ -2808,9 +2804,9 @@ void fe_mul121666(fe r_p, fe a_p)
#ifndef WC_NO_CACHE_RESISTANT
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = r_p;
register const byte* n asm ("r1") = n_p;
register const byte* a asm ("r2") = a_p;
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0xbc\n\t"
@ -3423,9 +3419,9 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#else
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = r_p;
register const byte* n asm ("r1") = n_p;
register const byte* a asm ("r2") = a_p;
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
@ -3802,8 +3798,8 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#ifdef HAVE_ED25519
void fe_invert(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0x88\n\t"
@ -3972,8 +3968,8 @@ void fe_invert(fe r_p, const fe a_p)
void fe_sq2(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #36\n\t"
@ -4138,8 +4134,8 @@ void fe_sq2(fe r_p, const fe a_p)
void fe_pow22523(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0x68\n\t"
@ -4308,8 +4304,8 @@ void fe_pow22523(fe r_p, const fe a_p)
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
{
register ge_p2 * r asm ("r0") = r_p;
register const ge_p1p1 * p asm ("r1") = p_p;
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4338,8 +4334,8 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
{
register ge_p3 * r asm ("r0") = r_p;
register const ge_p1p1 * p asm ("r1") = p_p;
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4373,8 +4369,8 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p2 * p asm ("r1") = p_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4420,9 +4416,9 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_precomp * q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"sub sp, sp, #12\n\t"
@ -4502,9 +4498,9 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_precomp * q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"sub sp, sp, #12\n\t"
@ -4585,9 +4581,9 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_cached* q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
@ -4668,9 +4664,9 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_cached* q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
@ -4751,7 +4747,7 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
void sc_reduce(byte* s_p)
{
register byte* s asm ("r0") = s_p;
register byte* s asm ("r0") = (byte*)s_p;
__asm__ __volatile__ (
"sub sp, sp, #52\n\t"
@ -5163,10 +5159,10 @@ void sc_reduce(byte* s_p)
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
{
register byte* s asm ("r0") = s_p;
register const byte* a asm ("r1") = a_p;
register const byte* b asm ("r2") = b_p;
register const byte* c asm ("r3") = c_p;
register byte* s asm ("r0") = (byte*)s_p;
register const byte* a asm ("r1") = (const byte*)a_p;
register const byte* b asm ("r2") = (const byte*)b_p;
register const byte* c asm ("r3") = (const byte*)c_p;
__asm__ __volatile__ (
"sub sp, sp, #0x50\n\t"

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,6 +37,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h>
@ -63,13 +65,13 @@ static const uint32_t L_SHA256_transform_len_k[] = {
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = sha256_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA256_transform_len_k]\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t"
@ -1587,9 +1589,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[data], %[data], #0x40\n\t"
"bne L_SHA256_transform_len_begin_%=\n\t"
"add sp, sp, #0xc0\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
: [L_SHA256_transform_len_k] "g" (L_SHA256_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
@ -1619,9 +1621,10 @@ static const uint32_t L_SHA256_transform_neon_len_k[] = {
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = sha256_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k;
__asm__ __volatile__ (
"sub sp, sp, #24\n\t"
@ -2648,9 +2651,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r10, [sp, #8]\n\t"
"bne L_SHA256_transform_neon_len_begin_%=\n\t"
"add sp, sp, #24\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
: [L_SHA256_transform_neon_len_k] "g" (L_SHA256_transform_neon_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
);
}

View File

@ -7679,6 +7679,7 @@ L_SHA512_transform_neon_len_k:
.type Transform_Sha512_Len, %function
Transform_Sha512_Len:
vpush {d8-d15}
adr r3, L_SHA512_transform_neon_len_k
# Load digest into working vars
vldm.64 r0, {d0-d7}
# Start of loop processing a block
@ -7715,7 +7716,6 @@ L_SHA512_transform_neon_len_begin:
vrev64.8 d30, d30
vrev64.8 d31, d31
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
adr r3, L_SHA512_transform_neon_len_k
mov r12, #4
# Start of 16 rounds
L_SHA512_transform_neon_len_start:
@ -9164,6 +9164,7 @@ L_SHA512_transform_neon_len_start:
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
vstm.64 r0, {d0-d7}
subs r2, r2, #0x80
sub r3, r3, #0x280
bne L_SHA512_transform_neon_len_begin
vpop {d8-d15}
bx lr

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,64 +37,65 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA512
#include <wolfssl/wolfcrypt/sha512.h>
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = sha512_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA512_transform_len_k]\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha512]]\n\t"
@ -7392,9 +7394,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"bne L_SHA512_transform_len_begin_%=\n\t"
"eor r0, r0, r0\n\t"
"add sp, sp, #0xc0\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
: [L_SHA512_transform_len_k] "g" (L_SHA512_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
@ -7403,54 +7405,55 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#ifndef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_neon_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = sha512_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_neon_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_neon_len_k;
__asm__ __volatile__ (
/* Load digest into working vars */
@ -7490,7 +7493,6 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"vrev64.8 d30, d30\n\t"
"vrev64.8 d31, d31\n\t"
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
"mov r3, %[L_SHA512_transform_neon_len_k]\n\t"
"mov r12, #4\n\t"
/* Start of 16 rounds */
"\n"
@ -8940,10 +8942,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
"vstm.64 %[sha512], {d0-d7}\n\t"
"subs %[len], %[len], #0x80\n\t"
"sub r3, r3, #0x280\n\t"
"bne L_SHA512_transform_neon_len_begin_%=\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
: [L_SHA512_transform_neon_len_k] "g" (L_SHA512_transform_neon_len_k)
: "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c)
:
: "memory", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}

View File

@ -31,6 +31,7 @@
#endif
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
@ -41,7 +42,6 @@
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
#include <wolfssl/wolfcrypt/aes.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
@ -5467,7 +5467,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#else /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */
#include <wolfssl/wolfcrypt/logging.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/aes.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts
@ -6312,9 +6313,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"
@ -6808,9 +6809,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"
@ -7430,9 +7431,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -280,13 +280,14 @@ fe_1:
# Set one
MOV r2, #0x1
MOV r3, #0x0
STRD r2, r3, [r0]
STM r0!, {r2, r3}
MOV r2, #0x0
STRD r2, r3, [r0, #8]
STRD r2, r3, [r0, #16]
STRD r2, r3, [r0, #24]
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
SUB r0, r0, #0x20
BX lr
# Cycle Count = 19
# Cycle Count = 20
.size fe_1,.-fe_1
.text
.align 4
@ -296,12 +297,13 @@ fe_0:
# Set zero
MOV r2, #0x0
MOV r3, #0x0
STRD r2, r3, [r0]
STRD r2, r3, [r0, #8]
STRD r2, r3, [r0, #16]
STRD r2, r3, [r0, #24]
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
SUB r0, r0, #0x20
BX lr
# Cycle Count = 18
# Cycle Count = 19
.size fe_0,.-fe_0
.text
.align 4
@ -1751,6 +1753,7 @@ fe_sq:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 24
.size fe_sq,.-fe_sq
#ifdef HAVE_CURVE25519
.text
.align 4
.globl fe_mul121666
@ -1803,26 +1806,31 @@ curve25519:
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [r0]
STM r0!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [r0, #8]
STRD r10, r11, [r0, #16]
STRD r10, r11, [r0, #24]
STM r0!, {r10, r11}
STM r0!, {r10, r11}
STM r0!, {r10, r11}
SUB r0, r0, #0x20
MOV r3, sp
# Set zero
MOV r10, #0x0
MOV r11, #0x0
STRD r10, r11, [sp]
STRD r10, r11, [sp, #8]
STRD r10, r11, [sp, #16]
STRD r10, r11, [sp, #24]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x20
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [sp, #32]
STM r3!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [sp, #40]
STRD r10, r11, [sp, #48]
STRD r10, r11, [sp, #56]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x40
# Copy
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
@ -1845,8 +1853,10 @@ L_curve25519_bits:
LDR r0, [sp, #160]
# Conditional Swap
RSB r1, r1, #0x0
LDRD r4, r5, [r0]
LDRD r6, r7, [sp, #64]
MOV r3, r0
ADD r12, sp, #0x40
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1855,10 +1865,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0]
STRD r6, r7, [sp, #64]
LDRD r4, r5, [r0, #8]
LDRD r6, r7, [sp, #72]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1867,10 +1877,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #8]
STRD r6, r7, [sp, #72]
LDRD r4, r5, [r0, #16]
LDRD r6, r7, [sp, #80]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1879,10 +1889,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #16]
STRD r6, r7, [sp, #80]
LDRD r4, r5, [r0, #24]
LDRD r6, r7, [sp, #88]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1891,13 +1901,15 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #24]
STRD r6, r7, [sp, #88]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDR r1, [sp, #172]
# Conditional Swap
RSB r1, r1, #0x0
LDRD r4, r5, [sp]
LDRD r6, r7, [sp, #32]
MOV r3, sp
ADD r12, sp, #0x20
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1906,10 +1918,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp]
STRD r6, r7, [sp, #32]
LDRD r4, r5, [sp, #8]
LDRD r6, r7, [sp, #40]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1918,10 +1930,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #8]
STRD r6, r7, [sp, #40]
LDRD r4, r5, [sp, #16]
LDRD r6, r7, [sp, #48]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1930,10 +1942,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #16]
STRD r6, r7, [sp, #48]
LDRD r4, r5, [sp, #24]
LDRD r6, r7, [sp, #56]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1942,8 +1954,8 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #24]
STRD r6, r7, [sp, #56]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDR r1, [sp, #184]
STR r1, [sp, #172]
MOV r3, sp
@ -2165,7 +2177,7 @@ L_curve25519_inv_8:
MOV r0, #0x0
ADD sp, sp, #0xbc
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 684
# Cycle Count = 693
.size curve25519,.-curve25519
#else
.text
@ -2188,26 +2200,31 @@ curve25519:
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [r0]
STM r0!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [r0, #8]
STRD r10, r11, [r0, #16]
STRD r10, r11, [r0, #24]
STM r0!, {r10, r11}
STM r0!, {r10, r11}
STM r0!, {r10, r11}
SUB r0, r0, #0x20
MOV r3, sp
# Set zero
MOV r10, #0x0
MOV r11, #0x0
STRD r10, r11, [sp]
STRD r10, r11, [sp, #8]
STRD r10, r11, [sp, #16]
STRD r10, r11, [sp, #24]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x20
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [sp, #32]
STM r3!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [sp, #40]
STRD r10, r11, [sp, #48]
STRD r10, r11, [sp, #56]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x40
# Copy
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
@ -2470,9 +2487,10 @@ L_curve25519_inv_8:
MOV r0, #0x0
ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 595
# Cycle Count = 600
.size curve25519,.-curve25519
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
.text
.align 4

View File

@ -48,7 +48,7 @@
#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
void fe_init(void)
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
@ -59,7 +59,7 @@ void fe_init(void)
}
void fe_add_sub_op(void);
void fe_add_sub_op(void)
void fe_add_sub_op()
{
__asm__ __volatile__ (
/* Add-Sub */
@ -156,7 +156,7 @@ void fe_add_sub_op(void)
}
void fe_sub_op(void);
void fe_sub_op(void)
void fe_sub_op()
{
__asm__ __volatile__ (
/* Sub */
@ -190,18 +190,22 @@ void fe_sub_op(void)
);
}
void fe_sub(fe r, const fe a, const fe b)
void fe_sub(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_sub_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_add_op(void);
void fe_add_op(void)
void fe_add_op()
{
__asm__ __volatile__ (
/* Add */
@ -235,31 +239,41 @@ void fe_add_op(void)
);
}
void fe_add(fe r, const fe a, const fe b)
void fe_add(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_add_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
#ifdef HAVE_ED25519
void fe_frombytes(fe out, const unsigned char* in)
void fe_frombytes(fe out_p, const unsigned char* in_p)
{
register sword32* out asm ("r0") = (sword32*)out_p;
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
__asm__ __volatile__ (
"LDM %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"BFC r9, #31, #1\n\t"
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [out] "+l" (out), [in] "+l" (in)
: [out] "+r" (out), [in] "+r" (in)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
);
}
void fe_tobytes(unsigned char* out, const fe n)
void fe_tobytes(unsigned char* out_p, const fe n_p)
{
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
register const sword32* n asm ("r1") = (const sword32*)n_p;
__asm__ __volatile__ (
"LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r10, r2, #0x13\n\t"
@ -282,47 +296,56 @@ void fe_tobytes(unsigned char* out, const fe n)
"ADC r9, r9, #0x0\n\t"
"BFC r9, #31, #1\n\t"
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [out] "+l" (out), [n] "+l" (n)
: [out] "+r" (out), [n] "+r" (n)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
}
void fe_1(fe n)
void fe_1(fe n_p)
{
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set one */
"MOV r2, #0x1\n\t"
"MOV r3, #0x0\n\t"
"STRD r2, r3, [%[n]]\n\t"
"STM %[n]!, {r2, r3}\n\t"
"MOV r2, #0x0\n\t"
"STRD r2, r3, [%[n], #8]\n\t"
"STRD r2, r3, [%[n], #16]\n\t"
"STRD r2, r3, [%[n], #24]\n\t"
: [n] "+l" (n)
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"SUB %[n], %[n], #0x20\n\t"
: [n] "+r" (n)
:
: "memory", "r2", "r3"
);
}
void fe_0(fe n)
void fe_0(fe n_p)
{
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set zero */
"MOV r2, #0x0\n\t"
"MOV r3, #0x0\n\t"
"STRD r2, r3, [%[n]]\n\t"
"STRD r2, r3, [%[n], #8]\n\t"
"STRD r2, r3, [%[n], #16]\n\t"
"STRD r2, r3, [%[n], #24]\n\t"
: [n] "+l" (n)
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"SUB %[n], %[n], #0x20\n\t"
: [n] "+r" (n)
:
: "memory", "r2", "r3"
);
}
void fe_copy(fe r, const fe a)
void fe_copy(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
/* Copy */
"LDRD r2, r3, [%[a]]\n\t"
@ -333,14 +356,17 @@ void fe_copy(fe r, const fe a)
"LDRD r4, r5, [%[a], #24]\n\t"
"STRD r2, r3, [%[r], #16]\n\t"
"STRD r4, r5, [%[r], #24]\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5"
);
}
void fe_neg(fe r, const fe a)
void fe_neg(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"MVN r7, #0x0\n\t"
"MVN r6, #0x12\n\t"
@ -357,14 +383,16 @@ void fe_neg(fe r, const fe a)
"SBCS r4, r7, r4\n\t"
"SBC r5, r6, r5\n\t"
"STM %[r]!, {r2, r3, r4, r5}\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7"
);
}
int fe_isnonzero(const fe a)
int fe_isnonzero(const fe a_p)
{
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r1, r2, #0x13\n\t"
@ -393,15 +421,17 @@ int fe_isnonzero(const fe a)
"ORR r4, r4, r6\n\t"
"ORR r2, r2, r8\n\t"
"ORR %[a], r2, r4\n\t"
: [a] "+l" (a)
: [a] "+r" (a)
:
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return (uint32_t)(size_t)a;
}
int fe_isnegative(const fe a)
int fe_isnegative(const fe a_p)
{
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"LDM %[a]!, {r2, r3, r4, r5}\n\t"
"ADDS r1, r2, #0x13\n\t"
@ -417,7 +447,7 @@ int fe_isnegative(const fe a)
"AND %[a], r2, #0x1\n\t"
"LSR r1, r1, #31\n\t"
"EOR %[a], %[a], r1\n\t"
: [a] "+l" (a)
: [a] "+r" (a)
:
: "memory", "r1", "r2", "r3", "r4", "r5"
);
@ -425,8 +455,12 @@ int fe_isnegative(const fe a)
}
#ifndef WC_NO_CACHE_RESISTANT
void fe_cmov_table(fe* r, fe* base, signed char b)
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"SXTB %[b], %[b]\n\t"
"SBFX r3, %[b], #7, #1\n\t"
@ -1391,15 +1425,19 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
"STRD r4, r5, [%[r], #24]\n\t"
"STRD r6, r7, [%[r], #56]\n\t"
"STRD r8, r9, [%[r], #88]\n\t"
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr"
);
}
#else
void fe_cmov_table(fe* r, fe* base, signed char b)
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"SXTB %[b], %[b]\n\t"
"SBFX r3, %[b], #7, #1\n\t"
@ -1493,7 +1531,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
"AND r7, r7, lr\n\t"
"STM %[r]!, {r4, r5, r6, r7}\n\t"
"SUB %[base], %[base], %[b]\n\t"
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
@ -1502,7 +1540,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_ED25519 */
void fe_mul_op(void);
void fe_mul_op(void)
void fe_mul_op()
{
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
@ -1634,18 +1672,22 @@ void fe_mul_op(void)
);
}
void fe_mul(fe r, const fe a, const fe b)
void fe_mul(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_mul_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_sq_op(void);
void fe_sq_op(void)
void fe_sq_op()
{
__asm__ __volatile__ (
"SUB sp, sp, #0x20\n\t"
@ -1763,18 +1805,25 @@ void fe_sq_op(void)
);
}
void fe_sq(fe r, const fe a)
void fe_sq(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"BL fe_sq_op\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_mul121666(fe r, fe a)
#ifdef HAVE_CURVE25519
void fe_mul121666(fe r_p, fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register sword32* a asm ("r1") = (sword32*)a_p;
__asm__ __volatile__ (
/* Multiply by 121666 */
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -1803,15 +1852,19 @@ void fe_mul121666(fe r, fe a)
"ADCS r8, r8, #0x0\n\t"
"ADC r9, r9, #0x0\n\t"
"STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
#ifndef WC_NO_CACHE_RESISTANT
int curve25519(byte* r, const byte* n, const byte* a)
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xbc\n\t"
"STR %[r], [sp, #160]\n\t"
@ -1822,26 +1875,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [%[r]]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [%[r], #8]\n\t"
"STRD r10, r11, [%[r], #16]\n\t"
"STRD r10, r11, [%[r], #24]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"SUB %[r], %[r], #0x20\n\t"
"MOV r3, sp\n\t"
/* Set zero */
"MOV r10, #0x0\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp]\n\t"
"STRD r10, r11, [sp, #8]\n\t"
"STRD r10, r11, [sp, #16]\n\t"
"STRD r10, r11, [sp, #24]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x20\n\t"
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp, #32]\n\t"
"STM r3!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [sp, #40]\n\t"
"STRD r10, r11, [sp, #48]\n\t"
"STRD r10, r11, [sp, #56]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x40\n\t"
/* Copy */
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
@ -1866,8 +1924,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"LDR %[r], [sp, #160]\n\t"
/* Conditional Swap */
"RSB %[n], %[n], #0x0\n\t"
"LDRD r4, r5, [%[r]]\n\t"
"LDRD r6, r7, [sp, #64]\n\t"
"MOV r3, r0\n\t"
"ADD r12, sp, #0x40\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1876,10 +1936,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r]]\n\t"
"STRD r6, r7, [sp, #64]\n\t"
"LDRD r4, r5, [%[r], #8]\n\t"
"LDRD r6, r7, [sp, #72]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1888,10 +1948,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #8]\n\t"
"STRD r6, r7, [sp, #72]\n\t"
"LDRD r4, r5, [%[r], #16]\n\t"
"LDRD r6, r7, [sp, #80]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1900,10 +1960,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #16]\n\t"
"STRD r6, r7, [sp, #80]\n\t"
"LDRD r4, r5, [%[r], #24]\n\t"
"LDRD r6, r7, [sp, #88]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1912,13 +1972,15 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #24]\n\t"
"STRD r6, r7, [sp, #88]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDR %[n], [sp, #172]\n\t"
/* Conditional Swap */
"RSB %[n], %[n], #0x0\n\t"
"LDRD r4, r5, [sp]\n\t"
"LDRD r6, r7, [sp, #32]\n\t"
"MOV r3, sp\n\t"
"ADD r12, sp, #0x20\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1927,10 +1989,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp]\n\t"
"STRD r6, r7, [sp, #32]\n\t"
"LDRD r4, r5, [sp, #8]\n\t"
"LDRD r6, r7, [sp, #40]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1939,10 +2001,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #8]\n\t"
"STRD r6, r7, [sp, #40]\n\t"
"LDRD r4, r5, [sp, #16]\n\t"
"LDRD r6, r7, [sp, #48]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1951,10 +2013,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #16]\n\t"
"STRD r6, r7, [sp, #48]\n\t"
"LDRD r4, r5, [sp, #24]\n\t"
"LDRD r6, r7, [sp, #56]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1963,8 +2025,8 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #24]\n\t"
"STRD r6, r7, [sp, #56]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDR %[n], [sp, #184]\n\t"
"STR %[n], [sp, #172]\n\t"
"MOV r3, sp\n\t"
@ -2193,7 +2255,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
"BL fe_mul_op\n\t"
"MOV r0, #0x0\n\t"
"ADD sp, sp, #0xbc\n\t"
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
);
@ -2201,8 +2263,12 @@ int curve25519(byte* r, const byte* n, const byte* a)
}
#else
int curve25519(byte* r, const byte* n, const byte* a)
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"STR %[r], [sp, #176]\n\t"
@ -2218,26 +2284,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [%[r]]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [%[r], #8]\n\t"
"STRD r10, r11, [%[r], #16]\n\t"
"STRD r10, r11, [%[r], #24]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"SUB %[r], %[r], #0x20\n\t"
"MOV r3, sp\n\t"
/* Set zero */
"MOV r10, #0x0\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp]\n\t"
"STRD r10, r11, [sp, #8]\n\t"
"STRD r10, r11, [sp, #16]\n\t"
"STRD r10, r11, [sp, #24]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x20\n\t"
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp, #32]\n\t"
"STM r3!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [sp, #40]\n\t"
"STRD r10, r11, [sp, #48]\n\t"
"STRD r10, r11, [sp, #56]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x40\n\t"
/* Copy */
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
@ -2508,7 +2579,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
"STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"MOV r0, #0x0\n\t"
"ADD sp, sp, #0xc0\n\t"
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
);
@ -2516,9 +2587,13 @@ int curve25519(byte* r, const byte* n, const byte* a)
}
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
void fe_invert(fe r, const fe a)
void fe_invert(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x88\n\t"
/* Invert */
@ -2678,14 +2753,17 @@ void fe_invert(fe r, const fe a)
"LDR %[a], [sp, #132]\n\t"
"LDR %[r], [sp, #128]\n\t"
"ADD sp, sp, #0x88\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
void fe_sq2(fe r, const fe a)
void fe_sq2(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x24\n\t"
"STRD r0, r1, [sp, #28]\n\t"
@ -2831,14 +2909,17 @@ void fe_sq2(fe r, const fe a)
"STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
"MOV r0, r12\n\t"
"MOV r1, lr\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr"
);
}
void fe_pow22523(fe r, const fe a)
void fe_pow22523(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x68\n\t"
/* pow22523 */
@ -2998,14 +3079,17 @@ void fe_pow22523(fe r, const fe a)
"LDR %[a], [sp, #100]\n\t"
"LDR %[r], [sp, #96]\n\t"
"ADD sp, sp, #0x68\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
{
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3025,14 +3109,17 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
"ADD r0, r0, #0x40\n\t"
"BL fe_mul_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
{
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3057,14 +3144,17 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
"ADD r0, r0, #0x60\n\t"
"BL fe_mul_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3101,14 +3191,18 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
"MOV r1, r0\n\t"
"BL fe_sub_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc\n\t"
"STR %[r], [sp]\n\t"
@ -3179,14 +3273,18 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"ADD r1, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0xc\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc\n\t"
"STR %[r], [sp]\n\t"
@ -3258,14 +3356,18 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"ADD r0, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0xc\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
"STR %[r], [sp]\n\t"
@ -3337,14 +3439,18 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"ADD r0, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0x2c\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
"STR %[r], [sp]\n\t"
@ -3416,14 +3522,16 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"ADD r0, r0, #0x40\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0x2c\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void sc_reduce(byte* s)
void sc_reduce(byte* s_p)
{
register byte* s asm ("r0") = (byte*)s_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x34\n\t"
/* Load bits 252-511 */
@ -3694,14 +3802,19 @@ void sc_reduce(byte* s)
/* Store result */
"STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADD sp, sp, #0x34\n\t"
: [s] "+l" (s)
: [s] "+r" (s)
:
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
{
register byte* s asm ("r0") = (byte*)s_p;
register const byte* a asm ("r1") = (const byte*)a_p;
register const byte* b asm ("r2") = (const byte*)b_p;
register const byte* c asm ("r3") = (const byte*)c_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x50\n\t"
"ADD lr, sp, #0x44\n\t"
@ -4096,7 +4209,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
/* Store result */
"STM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
"ADD sp, sp, #0x50\n\t"
: [s] "+l" (s), [a] "+l" (a), [b] "+l" (b), [c] "+l" (c)
: [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);

View File

@ -42,75 +42,32 @@
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint32_t L_SHA256_transform_len_k[] = {
0x428a2f98,
0x71374491,
0xb5c0fbcf,
0xe9b5dba5,
0x3956c25b,
0x59f111f1,
0x923f82a4,
0xab1c5ed5,
0xd807aa98,
0x12835b01,
0x243185be,
0x550c7dc3,
0x72be5d74,
0x80deb1fe,
0x9bdc06a7,
0xc19bf174,
0xe49b69c1,
0xefbe4786,
0xfc19dc6,
0x240ca1cc,
0x2de92c6f,
0x4a7484aa,
0x5cb0a9dc,
0x76f988da,
0x983e5152,
0xa831c66d,
0xb00327c8,
0xbf597fc7,
0xc6e00bf3,
0xd5a79147,
0x6ca6351,
0x14292967,
0x27b70a85,
0x2e1b2138,
0x4d2c6dfc,
0x53380d13,
0x650a7354,
0x766a0abb,
0x81c2c92e,
0x92722c85,
0xa2bfe8a1,
0xa81a664b,
0xc24b8b70,
0xc76c51a3,
0xd192e819,
0xd6990624,
0xf40e3585,
0x106aa070,
0x19a4c116,
0x1e376c08,
0x2748774c,
0x34b0bcb5,
0x391c0cb3,
0x4ed8aa4a,
0x5b9cca4f,
0x682e6ff3,
0x748f82ee,
0x78a5636f,
0x84c87814,
0x8cc70208,
0x90befffa,
0xa4506ceb,
0xbef9a3f7,
0xc67178f2,
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"MOV r3, %[L_SHA256_transform_len_k]\n\t"
@ -1463,9 +1420,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
"ADD %[data], %[data], #0x40\n\t"
"BNE L_SHA256_transform_len_begin_%=\n\t"
"ADD sp, sp, #0xc0\n\t"
: [sha256] "+l" (sha256), [data] "+l" (data), [len] "+l" (len)
: [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}

View File

@ -42,91 +42,56 @@
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL,
0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL,
0x59f111f1b605d019UL,
0x923f82a4af194f9bUL,
0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL,
0x12835b0145706fbeUL,
0x243185be4ee4b28cUL,
0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL,
0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL,
0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL,
0xefbe4786384f25e3UL,
0xfc19dc68b8cd5b5UL,
0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL,
0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL,
0x76f988da831153b5UL,
0x983e5152ee66dfabUL,
0xa831c66d2db43210UL,
0xb00327c898fb213fUL,
0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL,
0xd5a79147930aa725UL,
0x6ca6351e003826fUL,
0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL,
0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL,
0x53380d139d95b3dfUL,
0x650a73548baf63deUL,
0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL,
0x92722c851482353bUL,
0xa2bfe8a14cf10364UL,
0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL,
0xc76c51a30654be30UL,
0xd192e819d6ef5218UL,
0xd69906245565a910UL,
0xf40e35855771202aUL,
0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL,
0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL,
0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL,
0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL,
0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL,
0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL,
0x8cc702081a6439ecUL,
0x90befffa23631e28UL,
0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL,
0xc67178f2e372532bUL,
0xca273eceea26619cUL,
0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL,
0xf57d4f7fee6ed178UL,
0x6f067aa72176fbaUL,
0xa637dc5a2c898a6UL,
0x113f9804bef90daeUL,
0x1b710b35131c471bUL,
0x28db77f523047d84UL,
0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL,
0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL,
0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL,
0x6c44198c4a475817UL,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"MOV r3, %[L_SHA512_transform_len_k]\n\t"
@ -3578,9 +3543,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"BNE L_SHA512_transform_len_begin_%=\n\t"
"EOR r0, r0, r0\n\t"
"ADD sp, sp, #0xc0\n\t"
: [sha512] "+l" (sha512), [data] "+l" (data), [len] "+l" (len)
: [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}

View File

@ -56,7 +56,7 @@ typedef struct Gcm {
} Gcm;
WOLFSSL_LOCAL void GenerateM0(Gcm* gcm);
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
WOLFSSL_LOCAL void GMULT(byte* X, byte* Y);
#endif
WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,

View File

@ -115,7 +115,7 @@ typedef struct {
void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)r, (ge_p2 *)p)
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)(r), (ge_p2 *)(p))
void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);