From cfab666369d52f8b342f5ff99fe34d1bcf48b22c Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Wed, 26 Mar 2025 12:46:32 +1000 Subject: [PATCH 1/3] ARM32/Thumb2 ASM: fix WOLFSSL_NO_VAR_ASSIGN_REG Thumb2 needed constants defined even with no register assignments. ARM32 needed support added fo rnot having registers assigned to variables. --- wolfcrypt/src/port/arm/armv8-32-aes-asm.S | 4 +- wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 157 ++++++++++- .../src/port/arm/armv8-32-chacha-asm_c.c | 33 +++ .../src/port/arm/armv8-32-curve25519_c.c | 250 ++++++++++++++++++ wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c | 70 +++++ .../src/port/arm/armv8-32-poly1305-asm_c.c | 56 ++++ .../src/port/arm/armv8-32-sha256-asm_c.c | 25 ++ wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c | 24 ++ .../src/port/arm/armv8-32-sha512-asm_c.c | 26 ++ wolfcrypt/src/port/arm/thumb2-aes-asm.S | 4 +- wolfcrypt/src/port/arm/thumb2-aes-asm_c.c | 77 ++++-- wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c | 4 + wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c | 24 ++ .../src/port/arm/thumb2-poly1305-asm_c.c | 4 + wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c | 4 + wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c | 3 + wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c | 4 + 17 files changed, 741 insertions(+), 28 deletions(-) diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index d442c9c86..745e7f609 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -3850,13 +3850,13 @@ L_AES_ECB_decrypt_end: .type AES_CBC_decrypt, %function AES_CBC_decrypt: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} - ldr r8, [sp, #36] - ldr r4, [sp, #40] mov lr, r0 adr r0, L_AES_ARM32_td_ecb ldr r0, [r0] mov r12, r2 adr r2, L_AES_ARM32_td4 + ldr r8, [sp, #36] + ldr r4, [sp, #40] push {r3, r4} cmp r8, #10 beq L_AES_CBC_decrypt_loop_block_128 diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 22938bbd5..22d9397fd 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifndef NO_AES #include @@ -204,12 +208,23 @@ static const word32* L_AES_ARM32_te = L_AES_ARM32_te_data; * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key(unsigned char* ks_p, word32 rounds_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_invert_key(unsigned char* ks_p, word32 rounds_p) +#else +void AES_invert_key(unsigned char* ks, word32 rounds) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; register word32 rounds asm ("r1") = (word32)rounds_p; register word32* L_AES_ARM32_te_c asm ("r2") = (word32*)L_AES_ARM32_te; register word32* L_AES_ARM32_td_c asm ("r3") = (word32*)L_AES_ARM32_td; +#else + register word32* L_AES_ARM32_te_c = (word32*)L_AES_ARM32_te; + + register word32* L_AES_ARM32_td_c = (word32*)L_AES_ARM32_td; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, %[L_AES_ARM32_te]\n\t" @@ -423,15 +438,27 @@ static const word32 L_AES_ARM32_rcon[] = { void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) +#else +void AES_set_encrypt_key(const unsigned char* key, word32 len, + unsigned char* ks) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; register word32 len asm ("r1") = (word32)len_p; register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; register word32* L_AES_ARM32_te_c asm ("r3") = (word32*)L_AES_ARM32_te; register word32* L_AES_ARM32_rcon_c asm ("r4") = (word32*)&L_AES_ARM32_rcon; +#else + register word32* L_AES_ARM32_te_c = (word32*)L_AES_ARM32_te; + + register word32* L_AES_ARM32_rcon_c = (word32*)&L_AES_ARM32_rcon; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r8, %[L_AES_ARM32_te]\n\t" @@ -939,13 +966,19 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, const word32* ks_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, const word32* ks_p) +#else +void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const word32* te asm ("r0") = (const word32*)te_p; register int nr asm ("r1") = (int)nr_p; register int len asm ("r2") = (int)len_p; register const word32* ks asm ("r3") = (const word32*)ks_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" @@ -1595,9 +1628,15 @@ void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, static const word32* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data; void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +#else +void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -1605,11 +1644,19 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, register int nr asm ("r4") = (int)nr_p; register word32* L_AES_ARM32_te_ecb_c asm ("r5") = (word32*)L_AES_ARM32_te_ecb; +#else + register word32* L_AES_ARM32_te_ecb_c = (word32*)L_AES_ARM32_te_ecb; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_te_ecb]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r12, r4\n\t" +#else + "mov r12, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "push {%[ks]}\n\t" "cmp r12, #10\n\t" "beq L_AES_ECB_encrypt_start_block_128_%=\n\t" @@ -1851,10 +1898,16 @@ static const word32* L_AES_ARM32_te_cbc = L_AES_ARM32_te_data; void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +#else +void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -1863,10 +1916,22 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; register word32* L_AES_ARM32_te_cbc_c asm ("r6") = (word32*)L_AES_ARM32_te_cbc; +#else + register word32* L_AES_ARM32_te_cbc_c = (word32*)L_AES_ARM32_te_cbc; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r8, r4\n\t" +#else + "mov r8, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r9, r5\n\t" +#else + "mov r9, %[iv]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_te_cbc]\n\t" "ldm r9, {r4, r5, r6, r7}\n\t" @@ -2124,10 +2189,16 @@ static const word32* L_AES_ARM32_te_ctr = L_AES_ARM32_te_data; void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +#else +void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -2136,10 +2207,22 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; register word32* L_AES_ARM32_te_ctr_c asm ("r6") = (word32*)L_AES_ARM32_te_ctr; +#else + register word32* L_AES_ARM32_te_ctr_c = (word32*)L_AES_ARM32_te_ctr; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r12, r4\n\t" +#else + "mov r12, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r8, r5\n\t" +#else + "mov r8, %[ctr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_te_ctr]\n\t" "ldm r8, {r4, r5, r6, r7}\n\t" @@ -2398,11 +2481,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p) +#else +void AES_decrypt_block(const word32* td, int nr, const byte* td4) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const word32* td asm ("r0") = (const word32*)td_p; register int nr asm ("r1") = (int)nr_p; register const byte* td4 asm ("r2") = (const byte*)td4_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" @@ -3086,9 +3175,15 @@ static const byte L_AES_ARM32_td4[] = { #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +#else +void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -3097,9 +3192,19 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, register word32* L_AES_ARM32_td_ecb_c asm ("r5") = (word32*)L_AES_ARM32_td_ecb; register byte* L_AES_ARM32_td4_c asm ("r6") = (byte*)&L_AES_ARM32_td4; +#else + register word32* L_AES_ARM32_td_ecb_c = (word32*)L_AES_ARM32_td_ecb; + + register byte* L_AES_ARM32_td4_c = (byte*)&L_AES_ARM32_td4; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r8, r4\n\t" +#else + "mov r8, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_td_ecb]\n\t" "mov r12, %[len]\n\t" @@ -3339,10 +3444,16 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +#else +void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -3352,14 +3463,28 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, register word32* L_AES_ARM32_td_ecb_c asm ("r6") = (word32*)L_AES_ARM32_td_ecb; register byte* L_AES_ARM32_td4_c asm ("r7") = (byte*)&L_AES_ARM32_td4; +#else + register word32* L_AES_ARM32_td_ecb_c = (word32*)L_AES_ARM32_td_ecb; + + register byte* L_AES_ARM32_td4_c = (byte*)&L_AES_ARM32_td4; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "mov r8, r4\n\t" - "mov r4, r5\n\t" "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_td_ecb]\n\t" "mov r12, %[len]\n\t" "mov r2, %[L_AES_ARM32_td4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + "mov r8, r4\n\t" +#else + "mov r8, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + "mov r4, r5\n\t" +#else + "mov r4, %[iv]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "push {%[ks]-r4}\n\t" "cmp r8, #10\n\t" "beq L_AES_CBC_decrypt_loop_block_128_%=\n\t" @@ -3983,9 +4108,15 @@ static const word32 L_GCM_gmult_len_r[] = { void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) +#else +void GCM_gmult_len(unsigned char* x, const unsigned char** m, + const unsigned char* data, unsigned long len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* x asm ("r0") = (unsigned char*)x_p; register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; register const unsigned char* data asm ("r2") = @@ -3993,6 +4124,10 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, register unsigned long len asm ("r3") = (unsigned long)len_p; register word32* L_GCM_gmult_len_r_c asm ("r4") = (word32*)&L_GCM_gmult_len_r; +#else + register word32* L_GCM_gmult_len_r_c = (word32*)&L_GCM_gmult_len_r; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov lr, %[L_GCM_gmult_len_r]\n\t" @@ -4578,10 +4713,16 @@ static const word32* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data; void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +#else +void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; register unsigned char* out asm ("r1") = (unsigned char*)out_p; register unsigned long len asm ("r2") = (unsigned long)len_p; @@ -4590,10 +4731,22 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; register word32* L_AES_ARM32_te_gcm_c asm ("r6") = (word32*)L_AES_ARM32_te_gcm; +#else + register word32* L_AES_ARM32_te_gcm_c = (word32*)L_AES_ARM32_te_gcm; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r12, r4\n\t" +#else + "mov r12, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "mov r8, r5\n\t" +#else + "mov r8, %[ctr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_te_gcm]\n\t" "ldm r8, {r4, r5, r6, r7}\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index 01281af89..10568dba1 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -44,19 +44,29 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef HAVE_CHACHA #include +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void wc_chacha_setiv(word32* x_p, const byte* iv_p, word32 counter_p) +#else +void wc_chacha_setiv(word32* x, const byte* iv, word32 counter) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word32* x asm ("r0") = (word32*)x_p; register const byte* iv asm ("r1") = (const byte*)iv_p; register word32 counter asm ("r2") = (word32)counter_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "add r3, %[x], #52\n\t" @@ -81,15 +91,26 @@ static const word32 L_chacha_arm32_constants[] = { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void wc_chacha_setkey(word32* x_p, const byte* key_p, word32 keySz_p) +#else +void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word32* x asm ("r0") = (word32*)x_p; register const byte* key asm ("r1") = (const byte*)key_p; register word32 keySz asm ("r2") = (word32)keySz_p; register word32* L_chacha_arm32_constants_c asm ("r3") = (word32*)&L_chacha_arm32_constants; +#else + register word32* L_chacha_arm32_constants_c = + (word32*)&L_chacha_arm32_constants; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( + "mov r3, %[L_chacha_arm32_constants]\n\t" "subs %[keySz], %[keySz], #16\n\t" "add r3, r3, %[keySz]\n\t" /* Start state with constants */ @@ -126,13 +147,19 @@ void wc_chacha_setkey(word32* x_p, const byte* key_p, word32 keySz_p) } #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, word32 len_p) +#else +void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ChaCha* ctx asm ("r0") = (ChaCha*)ctx_p; register byte* c asm ("r1") = (byte*)c_p; register const byte* m asm ("r2") = (const byte*)m_p; register word32 len asm ("r3") = (word32)len_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #52\n\t" @@ -490,13 +517,19 @@ void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void wc_chacha_use_over(byte* over_p, byte* output_p, const byte* input_p, word32 len_p) +#else +void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* over asm ("r0") = (byte*)over_p; register byte* output asm ("r1") = (byte*)output_p; register const byte* input asm ("r2") = (const byte*)input_p; register word32 len asm ("r3") = (word32)len_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 40462097e..363dd12bf 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ /* Based on work by: Emil Lenngren * https://github.com/pornin/X25519-Cortex-M4 */ @@ -60,8 +64,14 @@ #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_init() +#else +void fe_init() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n\t" : @@ -71,8 +81,14 @@ void fe_init() } void fe_add_sub_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add_sub_op() +#else +void fe_add_sub_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Add-Sub */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -279,8 +295,14 @@ void fe_add_sub_op() } void fe_sub_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sub_op() +#else +void fe_sub_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Sub */ "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -320,11 +342,17 @@ void fe_sub_op() ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sub(fe r_p, const fe a_p, const fe b_p) +#else +void fe_sub(fe r, const fe a, const fe b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; register const sword32* b asm ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "bl fe_sub_op\n\t" @@ -336,8 +364,14 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) } void fe_add_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add_op() +#else +void fe_add_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Add */ "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -378,11 +412,17 @@ void fe_add_op() ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add(fe r_p, const fe a_p, const fe b_p) +#else +void fe_add(fe r, const fe a, const fe b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; register const sword32* b asm ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "bl fe_add_op\n\t" @@ -394,10 +434,16 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) } #ifdef HAVE_ED25519 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_frombytes(fe out_p, const unsigned char* in_p) +#else +void fe_frombytes(fe out, const unsigned char* in) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* out asm ("r0") = (sword32*)out_p; register const unsigned char* in asm ("r1") = (const unsigned char*)in_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "ldr r2, [%[in]]\n\t" @@ -427,10 +473,16 @@ void fe_frombytes(fe out_p, const unsigned char* in_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_tobytes(unsigned char* out_p, const fe n_p) +#else +void fe_tobytes(unsigned char* out, const fe n) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* out asm ("r0") = (unsigned char*)out_p; register const sword32* n asm ("r1") = (const sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -471,9 +523,15 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_1(fe n_p) +#else +void fe_1(fe n) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* n asm ("r0") = (sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Set one */ @@ -492,9 +550,15 @@ void fe_1(fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_0(fe n_p) +#else +void fe_0(fe n) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* n asm ("r0") = (sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Set zero */ @@ -513,10 +577,16 @@ void fe_0(fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_copy(fe r_p, const fe a_p) +#else +void fe_copy(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Copy */ @@ -572,10 +642,16 @@ void fe_copy(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_neg(fe r_p, const fe a_p) +#else +void fe_neg(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mvn lr, #0\n\t" @@ -599,9 +675,15 @@ void fe_neg(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int fe_isnonzero(const fe a_p) +#else +int fe_isnonzero(const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const sword32* a asm ("r0") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -643,9 +725,15 @@ int fe_isnonzero(const fe a_p) return (word32)(size_t)a; } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int fe_isnegative(const fe a_p) +#else +int fe_isnegative(const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register const sword32* a asm ("r0") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "ldm %[a]!, {r2, r3, r4, r5}\n\t" @@ -671,11 +759,17 @@ int fe_isnegative(const fe a_p) #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) #ifndef WC_NO_CACHE_RESISTANT +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) +#else +void fe_cmov_table(fe* r, fe* base, signed char b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register fe* r asm ("r0") = (fe*)r_p; register fe* base asm ("r1") = (fe*)base_p; register signed char b asm ("r2") = (signed char)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -2205,11 +2299,17 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) +#else +void fe_cmov_table(fe* r, fe* base, signed char b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register fe* r asm ("r0") = (fe*)r_p; register fe* base asm ("r1") = (fe*)base_p; register signed char b asm ("r2") = (signed char)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -2330,8 +2430,14 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif /* HAVE_ED25519 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_mul_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul_op() +#else +void fe_mul_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #40\n\t" "str r0, [sp, #36]\n\t" @@ -2714,8 +2820,14 @@ void fe_mul_op() #else void fe_mul_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul_op() +#else +void fe_mul_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #44\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -2856,11 +2968,17 @@ void fe_mul_op() } #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul(fe r_p, const fe a_p, const fe b_p) +#else +void fe_mul(fe r, const fe a, const fe b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; register const sword32* b asm ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "bl fe_mul_op\n\t" @@ -2873,8 +2991,14 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_sq_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq_op() +#else +void fe_sq_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "str r0, [sp, #64]\n\t" @@ -3150,8 +3274,14 @@ void fe_sq_op() #else void fe_sq_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq_op() +#else +void fe_sq_op() +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "str r0, [sp, #28]\n\t" @@ -3278,10 +3408,16 @@ void fe_sq_op() } #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq(fe r_p, const fe a_p) +#else +void fe_sq(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "bl fe_sq_op\n\t" @@ -3294,10 +3430,16 @@ void fe_sq(fe r_p, const fe a_p) #ifdef HAVE_CURVE25519 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul121666(fe r_p, fe a_p) +#else +void fe_mul121666(fe r, fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register sword32* a asm ("r1") = (sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Multiply by 121666 */ @@ -3367,10 +3509,16 @@ void fe_mul121666(fe r_p, fe a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul121666(fe r_p, fe a_p) +#else +void fe_mul121666(fe r, fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register sword32* a asm ("r1") = (sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Multiply by 121666 */ @@ -3428,11 +3576,17 @@ void fe_mul121666(fe r_p, fe a_p) #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ #ifndef WC_NO_CACHE_RESISTANT +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int curve25519(byte* r_p, const byte* n_p, const byte* a_p) +#else +int curve25519(byte* r, const byte* n, const byte* a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* r asm ("r0") = (byte*)r_p; register const byte* n asm ("r1") = (const byte*)n_p; register const byte* a asm ("r2") = (const byte*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xbc\n\t" @@ -3819,11 +3973,17 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int curve25519(byte* r_p, const byte* n_p, const byte* a_p) +#else +int curve25519(byte* r, const byte* n, const byte* a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* r asm ("r0") = (byte*)r_p; register const byte* n asm ("r1") = (const byte*)n_p; register const byte* a asm ("r2") = (const byte*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" @@ -4135,10 +4295,16 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_CURVE25519 */ #ifdef HAVE_ED25519 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_invert(fe r_p, const fe a_p) +#else +void fe_invert(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" @@ -4307,10 +4473,16 @@ void fe_invert(fe r_p, const fe a_p) } #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq2(fe r_p, const fe a_p) +#else +void fe_sq2(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" @@ -4627,10 +4799,16 @@ void fe_sq2(fe r_p, const fe a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq2(fe r_p, const fe a_p) +#else +void fe_sq2(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #36\n\t" @@ -4806,10 +4984,16 @@ void fe_sq2(fe r_p, const fe a_p) } #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_pow22523(fe r_p, const fe a_p) +#else +void fe_pow22523(fe r, const fe a) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* r asm ("r0") = (sword32*)r_p; register const sword32* a asm ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x68\n\t" @@ -4977,10 +5161,16 @@ void fe_pow22523(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) +#else +void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p2 * r asm ("r0") = (ge_p2 *)r_p; register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -5008,10 +5198,16 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) +#else +void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p3 * r asm ("r0") = (ge_p3 *)r_p; register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -5044,10 +5240,16 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) +#else +void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -5092,11 +5294,17 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) +#else +void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #12\n\t" @@ -5179,11 +5387,17 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) +#else +void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #12\n\t" @@ -5267,11 +5481,17 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) +#else +void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #44\n\t" @@ -5355,11 +5575,17 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) +#else +void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #44\n\t" @@ -5444,9 +5670,15 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) } #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_reduce(byte* s_p) +#else +void sc_reduce(byte* s) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* s asm ("r0") = (byte*)s_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #56\n\t" @@ -6233,9 +6465,15 @@ void sc_reduce(byte* s_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_reduce(byte* s_p) +#else +void sc_reduce(byte* s) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* s asm ("r0") = (byte*)s_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #56\n\t" @@ -6895,12 +7133,18 @@ void sc_reduce(byte* s_p) #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ #ifdef HAVE_ED25519_SIGN #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) +#else +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* s asm ("r0") = (byte*)s_p; register const byte* a asm ("r1") = (const byte*)a_p; register const byte* b asm ("r2") = (const byte*)b_p; register const byte* c asm ("r3") = (const byte*)c_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x50\n\t" @@ -8044,12 +8288,18 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) +#else +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register byte* s asm ("r0") = (byte*)s_p; register const byte* a asm ("r1") = (const byte*)a_p; register const byte* b asm ("r2") = (const byte*)b_p; register const byte* c asm ("r3") = (const byte*)c_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0x50\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index b6aa589cc..7a03ed262 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #include #ifdef WOLFSSL_WC_MLKEM @@ -87,14 +91,25 @@ static const word16 L_mlkem_arm32_ntt_zetas[] = { 0x03be, 0x074d, 0x05f2, 0x065c, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void mlkem_arm32_ntt(sword16* r_p) +#else +void mlkem_arm32_ntt(sword16* r) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r asm ("r0") = (sword16*)r_p; register word16* L_mlkem_arm32_ntt_zetas_c asm ("r1") = (word16*)&L_mlkem_arm32_ntt_zetas; +#else + register word16* L_mlkem_arm32_ntt_zetas_c = + (word16*)&L_mlkem_arm32_ntt_zetas; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #8\n\t" + "mov r1, %[L_mlkem_arm32_ntt_zetas]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r10, #0x1\n\t" @@ -3123,14 +3138,25 @@ static const word16 L_mlkem_invntt_zetas_inv[] = { 0x05ed, 0x0167, 0x02f6, 0x05a1, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void mlkem_arm32_invntt(sword16* r_p) +#else +void mlkem_arm32_invntt(sword16* r) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r asm ("r0") = (sword16*)r_p; register word16* L_mlkem_invntt_zetas_inv_c asm ("r1") = (word16*)&L_mlkem_invntt_zetas_inv; +#else + register word16* L_mlkem_invntt_zetas_inv_c = + (word16*)&L_mlkem_invntt_zetas_inv; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #8\n\t" + "mov r1, %[L_mlkem_invntt_zetas_inv]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r10, #0x1\n\t" @@ -7553,16 +7579,27 @@ static const word16 L_mlkem_basemul_mont_zetas[] = { 0x03be, 0x074d, 0x05f2, 0x065c, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void mlkem_arm32_basemul_mont(sword16* r_p, const sword16* a_p, const sword16* b_p) +#else +void mlkem_arm32_basemul_mont(sword16* r, const sword16* a, const sword16* b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r asm ("r0") = (sword16*)r_p; register const sword16* a asm ("r1") = (const sword16*)a_p; register const sword16* b asm ("r2") = (const sword16*)b_p; register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( + "mov r3, %[L_mlkem_basemul_mont_zetas]\n\t" "add r3, r3, #0x80\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -7841,16 +7878,28 @@ void mlkem_arm32_basemul_mont(sword16* r_p, const sword16* a_p, ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void mlkem_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, const sword16* b_p) +#else +void mlkem_arm32_basemul_mont_add(sword16* r, const sword16* a, + const sword16* b) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r asm ("r0") = (sword16*)r_p; register const sword16* a asm ("r1") = (const sword16*)a_p; register const sword16* b asm ("r2") = (const sword16*)b_p; register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( + "mov r3, %[L_mlkem_basemul_mont_zetas]\n\t" "add r3, r3, #0x80\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -8163,11 +8212,21 @@ void mlkem_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void mlkem_arm32_csubq(sword16* p_p) +#else +void mlkem_arm32_csubq(sword16* p) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* p asm ("r0") = (sword16*)p_p; register word16* L_mlkem_basemul_mont_zetas_c asm ("r1") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -8342,15 +8401,26 @@ void mlkem_arm32_csubq(sword16* p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG unsigned int mlkem_arm32_rej_uniform(sword16* p_p, unsigned int len_p, const byte* r_p, unsigned int rLen_p) +#else +unsigned int mlkem_arm32_rej_uniform(sword16* p, unsigned int len, + const byte* r, unsigned int rLen) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* p asm ("r0") = (sword16*)p_p; register unsigned int len asm ("r1") = (unsigned int)len_p; register const byte* r asm ("r2") = (const byte*)r_p; register unsigned int rLen asm ("r3") = (unsigned int)rLen_p; register word16* L_mlkem_basemul_mont_zetas_c asm ("r4") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index 950a03676..15dccec1f 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -44,22 +44,33 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef HAVE_POLY1305 #include #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_arm32_blocks_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, int notLast_p) +#else +void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, + int notLast) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register const byte* m asm ("r1") = (const byte*)m_p; register word32 len asm ("r2") = (word32)len_p; register int notLast asm ("r3") = (int)notLast_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #28\n\t" @@ -282,12 +293,22 @@ static const word32 L_poly1305_arm32_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) +#else +void poly1305_set_key(Poly1305* ctx, const byte* key) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register const byte* key asm ("r1") = (const byte*)key_p; register word32* L_poly1305_arm32_clamp_c asm ("r2") = (word32*)&L_poly1305_arm32_clamp; +#else + register word32* L_poly1305_arm32_clamp_c = + (word32*)&L_poly1305_arm32_clamp; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Load mask. */ @@ -328,10 +349,16 @@ void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_final(Poly1305* ctx_p, byte* mac_p) +#else +void poly1305_final(Poly1305* ctx, byte* mac) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register byte* mac asm ("r1") = (byte*)mac_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "add r9, %[ctx], #16\n\t" @@ -385,13 +412,20 @@ void poly1305_final(Poly1305* ctx_p, byte* mac_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_arm32_blocks_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, int notLast_p) +#else +void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, + int notLast) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register const byte* m asm ("r1") = (const byte*)m_p; register word32 len asm ("r2") = (word32)len_p; register int notLast asm ("r3") = (int)notLast_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #28\n\t" @@ -610,12 +644,18 @@ void poly1305_arm32_blocks_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_arm32_blocks(Poly1305* ctx_p, const unsigned char* m_p, size_t bytes_p) +#else +void poly1305_arm32_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register const unsigned char* m asm ("r1") = (const unsigned char*)m_p; register size_t bytes asm ("r2") = (size_t)bytes_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "cmp %[bytes], #16\n\t" @@ -1074,12 +1114,22 @@ static const word32 L_poly1305_arm32_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) +#else +void poly1305_set_key(Poly1305* ctx, const byte* key) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register const byte* key asm ("r1") = (const byte*)key_p; register word32* L_poly1305_arm32_clamp_c asm ("r2") = (word32*)&L_poly1305_arm32_clamp; +#else + register word32* L_poly1305_arm32_clamp_c = + (word32*)&L_poly1305_arm32_clamp; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Load mask. */ @@ -1300,10 +1350,16 @@ void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void poly1305_final(Poly1305* ctx_p, byte* mac_p) +#else +void poly1305_final(Poly1305* ctx, byte* mac) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; register byte* mac asm ("r1") = (byte*)mac_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "add r9, %[ctx], #16\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 1ec907402..4d24a43e4 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifndef NO_SHA256 #include @@ -74,16 +78,27 @@ static const word32 L_SHA256_transform_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; register const byte* data asm ("r1") = (const byte*)data_p; register word32 len asm ("r2") = (word32)len_p; register word32* L_SHA256_transform_len_k_c asm ("r3") = (word32*)&L_SHA256_transform_len_k; +#else + register word32* L_SHA256_transform_len_k_c = + (word32*)&L_SHA256_transform_len_k; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" + "mov r3, %[L_SHA256_transform_len_k]\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldm r0, {r4, r5}\n\t" @@ -1760,13 +1775,23 @@ static const word32 L_SHA256_transform_neon_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; register const byte* data asm ("r1") = (const byte*)data_p; register word32 len asm ("r2") = (word32)len_p; register word32* L_SHA256_transform_neon_len_k_c asm ("r3") = (word32*)&L_SHA256_transform_neon_len_k; +#else + register word32* L_SHA256_transform_neon_len_k_c = + (word32*)&L_SHA256_transform_neon_len_k; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #24\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index 0caad5e90..68887eeef 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef WOLFSSL_SHA3 #ifndef WOLFSSL_ARMASM_NO_NEON static const word64 L_sha3_arm2_neon_rt[] = { @@ -68,14 +72,24 @@ static const word64 L_sha3_arm2_neon_rt[] = { #include +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void BlockSha3(word64* state_p) +#else +void BlockSha3(word64* state) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word64* state asm ("r0") = (word64*)state_p; register word64* L_sha3_arm2_neon_rt_c asm ("r1") = (word64*)&L_sha3_arm2_neon_rt; +#else + register word64* L_sha3_arm2_neon_rt_c = (word64*)&L_sha3_arm2_neon_rt; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #16\n\t" + "mov r1, %[L_sha3_arm2_neon_rt]\n\t" "mov r2, #24\n\t" "mov r3, sp\n\t" "vld1.8 {d0-d3}, [%[state]]!\n\t" @@ -361,13 +375,23 @@ static const word64 L_sha3_arm2_rt[] = { #include +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void BlockSha3(word64* state_p) +#else +void BlockSha3(word64* state) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word64* state asm ("r0") = (word64*)state_p; register word64* L_sha3_arm2_rt_c asm ("r1") = (word64*)&L_sha3_arm2_rt; +#else + register word64* L_sha3_arm2_rt_c = (word64*)&L_sha3_arm2_rt; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xcc\n\t" + "mov r1, %[L_sha3_arm2_rt]\n\t" "mov r2, #12\n\t" "\n" "L_sha3_arm32_begin_%=: \n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index ab02ae1fd..5bcf77ec9 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -44,11 +44,15 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #include @@ -98,16 +102,27 @@ static const word64 L_SHA512_transform_len_k[] = { void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; register const byte* data asm ("r1") = (const byte*)data_p; register word32 len asm ("r2") = (word32)len_p; register word64* L_SHA512_transform_len_k_c asm ("r3") = (word64*)&L_SHA512_transform_len_k; +#else + register word64* L_SHA512_transform_len_k_c = + (word64*)&L_SHA512_transform_len_k; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" + "mov r3, %[L_SHA512_transform_len_k]\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldm r0, {r4, r5}\n\t" @@ -7576,15 +7591,26 @@ static const word64 L_SHA512_transform_neon_len_k[] = { void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; register const byte* data asm ("r1") = (const byte*)data_p; register word32 len asm ("r2") = (word32)len_p; register word64* L_SHA512_transform_neon_len_k_c asm ("r3") = (word64*)&L_SHA512_transform_neon_len_k; +#else + register word64* L_SHA512_transform_neon_len_k_c = + (word64*)&L_SHA512_transform_neon_len_k; + +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( + "mov r3, %[L_SHA512_transform_neon_len_k]\n\t" /* Load digest into working vars */ "vldm.64 %[sha512], {d0-d7}\n\t" /* Start of loop processing a block */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index cc9bfb479..2a34a893c 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -2290,12 +2290,12 @@ L_AES_ECB_decrypt_end: .type AES_CBC_decrypt, %function AES_CBC_decrypt: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - LDR r8, [sp, #36] - LDR r4, [sp, #40] MOV lr, r0 LDR r0, L_AES_Thumb2_td_ecb MOV r12, r2 ADR r2, L_AES_Thumb2_td4 + LDR r8, [sp, #36] + LDR r4, [sp, #40] PUSH {r3, r4} CMP r8, #0xa #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 7372dcdb6..1bcc76cd4 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -214,6 +214,11 @@ void AES_invert_key(unsigned char* ks, word32 rounds) register word32* L_AES_Thumb2_td_c __asm__ ("r3") = (word32*)L_AES_Thumb2_td; +#else + register word32* L_AES_Thumb2_te_c = (word32*)L_AES_Thumb2_te; + + register word32* L_AES_Thumb2_td_c = (word32*)L_AES_Thumb2_td; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -359,6 +364,11 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, register word32* L_AES_Thumb2_rcon_c __asm__ ("r4") = (word32*)&L_AES_Thumb2_rcon; +#else + register word32* L_AES_Thumb2_te_c = (word32*)L_AES_Thumb2_te; + + register word32* L_AES_Thumb2_rcon_c = (word32*)&L_AES_Thumb2_rcon; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -893,6 +903,9 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_te_ecb; +#else + register word32* L_AES_Thumb2_te_ecb_c = (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -901,7 +914,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" #else - "LDR r12, [sp, #36]\n\t" + "MOV r12, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "PUSH {%[ks]}\n\t" "CMP r12, #0xa\n\t" @@ -1115,18 +1128,21 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ecb; +#else + register word32* L_AES_Thumb2_te_ecb_c = (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r4\n\t" #else - "LDR r8, [sp, #36]\n\t" + "MOV r8, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r9, r5\n\t" #else - "LDR r9, [sp, #40]\n\t" + "MOV r9, %[iv]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" @@ -1356,18 +1372,21 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ecb; +#else + register word32* L_AES_Thumb2_te_ecb_c = (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" #else - "LDR r12, [sp, #36]\n\t" + "MOV r12, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r5\n\t" #else - "LDR r8, [sp, #40]\n\t" + "MOV r8, %[ctr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" @@ -1889,8 +1908,12 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_td_ecb; - register byte* L_AES_Thumb2_td4_c __asm__ ("r6") = - (byte*)&L_AES_Thumb2_td4; + register byte L_AES_Thumb2_td4_c __asm__ ("r6") = (byte)&L_AES_Thumb2_td4; + +#else + register word32* L_AES_Thumb2_td_ecb_c = (word32*)L_AES_Thumb2_td_ecb; + + register byte L_AES_Thumb2_td4_c = (byte)&L_AES_Thumb2_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -1898,7 +1921,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r4\n\t" #else - "LDR r8, [sp, #36]\n\t" + "MOV r8, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" @@ -2111,26 +2134,30 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_td_ecb; - register byte* L_AES_Thumb2_td4_c __asm__ ("r7") = - (byte*)&L_AES_Thumb2_td4; + register byte L_AES_Thumb2_td4_c __asm__ ("r7") = (byte)&L_AES_Thumb2_td4; + +#else + register word32* L_AES_Thumb2_td_ecb_c = (word32*)L_AES_Thumb2_td_ecb; + + register byte L_AES_Thumb2_td4_c = (byte)&L_AES_Thumb2_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - "MOV r8, r4\n\t" -#else - "LDR r8, [sp, #36]\n\t" -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - "MOV r4, r5\n\t" -#else - "LDR r4, [sp, #40]\n\t" -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" "MOV r12, %[len]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + "MOV r8, r4\n\t" +#else + "MOV r8, %[nr]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + "MOV r4, r5\n\t" +#else + "MOV r4, %[iv]\n\t" +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "PUSH {%[ks], r4}\n\t" "CMP r8, #0xa\n\t" #if defined(__GNUC__) @@ -2524,6 +2551,9 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, register word32* L_GCM_gmult_len_r_c __asm__ ("r4") = (word32*)&L_GCM_gmult_len_r; +#else + register word32* L_GCM_gmult_len_r_c = (word32*)&L_GCM_gmult_len_r; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3117,18 +3147,21 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, register word32* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_gcm; +#else + register word32* L_AES_Thumb2_te_gcm_c = (word32*)L_AES_Thumb2_te_gcm; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r12, r4\n\t" #else - "LDR r12, [sp, #36]\n\t" + "MOV r12, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG "MOV r8, r5\n\t" #else - "LDR r8, [sp, #40]\n\t" + "MOV r8, %[ctr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_gcm]\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c index b33ce26ac..2f1a51604 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c @@ -95,6 +95,10 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) register word32* L_chacha_thumb2_constants_c __asm__ ("r3") = (word32*)&L_chacha_thumb2_constants; +#else + register word32* L_chacha_thumb2_constants_c = + (word32*)&L_chacha_thumb2_constants; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c index a693a91ea..ff93f626b 100644 --- a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c @@ -77,6 +77,10 @@ void mlkem_thumb2_ntt(sword16* r) register word16* L_mlkem_thumb2_ntt_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_thumb2_ntt_zetas; +#else + register word16* L_mlkem_thumb2_ntt_zetas_c = + (word16*)&L_mlkem_thumb2_ntt_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1396,6 +1400,10 @@ void mlkem_thumb2_invntt(sword16* r) register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") = (word16*)&L_mlkem_invntt_zetas_inv; +#else + register word16* L_mlkem_invntt_zetas_inv_c = + (word16*)&L_mlkem_invntt_zetas_inv; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3085,6 +3093,10 @@ void mlkem_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b) register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3232,6 +3244,10 @@ void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a, register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3387,6 +3403,10 @@ void mlkem_thumb2_csubq(sword16* p) register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3501,6 +3521,10 @@ unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len, register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r4") = (word16*)&L_mlkem_basemul_mont_zetas; +#else + register word16* L_mlkem_basemul_mont_zetas_c = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c index 1bcf57b83..00c01ab51 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c @@ -315,6 +315,10 @@ void poly1305_set_key(Poly1305* ctx, const byte* key) register word32* L_poly1305_thumb2_clamp_c __asm__ ("r2") = (word32*)&L_poly1305_thumb2_clamp; +#else + register word32* L_poly1305_thumb2_clamp_c = + (word32*)&L_poly1305_thumb2_clamp; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index 9804332fa..d0f7ce35b 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -81,6 +81,10 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) register word32* L_SHA256_transform_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_transform_len_k; +#else + register word32* L_SHA256_transform_len_k_c = + (word32*)&L_SHA256_transform_len_k; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c index 0fa620401..329678aa4 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c @@ -73,6 +73,9 @@ void BlockSha3(word64* state) register word64* L_sha3_thumb2_rt_c __asm__ ("r1") = (word64*)&L_sha3_thumb2_rt; +#else + register word64* L_sha3_thumb2_rt_c = (word64*)&L_sha3_thumb2_rt; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 637f2894d..d5a94c430 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -105,6 +105,10 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) register word64* L_SHA512_transform_len_k_c __asm__ ("r3") = (word64*)&L_SHA512_transform_len_k; +#else + register word64* L_SHA512_transform_len_k_c = + (word64*)&L_SHA512_transform_len_k; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( From ea677dd30dde313e6cc3c3969dc81693110034ed Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 27 Mar 2025 10:51:01 +1000 Subject: [PATCH 2/3] ARM32 inline ASM: make all vars input when not assigning regs Compiler doesn't keep parameters in the same registers as passed if they are output registers. --- wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 72 ++++++ .../src/port/arm/armv8-32-chacha-asm_c.c | 22 ++ .../src/port/arm/armv8-32-curve25519_c.c | 205 ++++++++++++++++++ wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c | 36 +++ .../src/port/arm/armv8-32-poly1305-asm_c.c | 39 ++++ .../src/port/arm/armv8-32-sha256-asm_c.c | 12 + wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c | 11 + .../src/port/arm/armv8-32-sha512-asm_c.c | 12 + 8 files changed, 409 insertions(+) diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 22d9397fd..3297faaf0 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -420,10 +420,17 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "str r8, [%[ks]], #4\n\t" "subs r11, r11, #1\n\t" "bne L_AES_invert_key_mix_loop_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c) : +#else + : + : [ks] "r" (ks), [rounds] "r" (rounds), + [L_AES_ARM32_te] "r" (L_AES_ARM32_te_c), + [L_AES_ARM32_td] "r" (L_AES_ARM32_td_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -956,10 +963,17 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "bne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" "L_AES_set_encrypt_key_end_%=: \n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c) : +#else + : + : [key] "r" (key), [len] "r" (len), [ks] "r" (ks), + [L_AES_ARM32_te] "r" (L_AES_ARM32_te_c), + [L_AES_ARM32_rcon] "r" (L_AES_ARM32_rcon_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" ); } @@ -1617,8 +1631,13 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) : +#else + : + : [te] "r" (te), [nr] "r" (nr), [len] "r" (len), [ks] "r" (ks) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -1884,9 +1903,15 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, "\n" "L_AES_ECB_encrypt_end_%=: \n\t" "pop {%[ks]}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [L_AES_ARM32_te_ecb] "r" (L_AES_ARM32_te_ecb_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -2175,10 +2200,17 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, "L_AES_CBC_encrypt_end_%=: \n\t" "pop {%[ks], r9}\n\t" "stm r9, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_cbc] "+r" (L_AES_ARM32_te_cbc_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [iv] "r" (iv), + [L_AES_ARM32_te_cbc] "r" (L_AES_ARM32_te_cbc_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -2468,10 +2500,17 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, "rev r7, r7\n\t" #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ "stm r8, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ctr] "+r" (L_AES_ARM32_te_ctr_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [ctr] "r" (ctr), + [L_AES_ARM32_te_ctr] "r" (L_AES_ARM32_te_ctr_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -3130,8 +3169,13 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : +#else + : + : [td] "r" (td), [nr] "r" (nr), [td4] "r" (td4) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -3431,10 +3475,17 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" "\n" "L_AES_ECB_decrypt_end_%=: \n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [L_AES_ARM32_td_ecb] "r" (L_AES_ARM32_td_ecb_c), + [L_AES_ARM32_td4] "r" (L_AES_ARM32_td4_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -4086,11 +4137,19 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, "\n" "L_AES_CBC_decrypt_end_%=: \n\t" "pop {%[ks]-r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [iv] "r" (iv), + [L_AES_ARM32_td_ecb] "r" (L_AES_ARM32_td_ecb_c), + [L_AES_ARM32_td4] "r" (L_AES_ARM32_td4_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r8", "r9", "r10", "r11" ); } @@ -4701,9 +4760,15 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, "subs %[len], %[len], #16\n\t" "add %[data], %[data], #16\n\t" "bne L_GCM_gmult_len_start_block_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : +#else + : + : [x] "r" (x), [m] "r" (m), [data] "r" (data), [len] "r" (len), + [L_GCM_gmult_len_r] "r" (L_GCM_gmult_len_r_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -4983,10 +5048,17 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, "rev r7, r7\n\t" #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ "stm r8, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [ctr] "r" (ctr), + [L_AES_ARM32_te_gcm] "r" (L_AES_ARM32_te_gcm_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index 10568dba1..a192eecef 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -80,8 +80,13 @@ void wc_chacha_setiv(word32* x, const byte* iv, word32 counter) "rev lr, lr\n\t" #endif /* BIG_ENDIAN_ORDER */ "stm r3, {r4, r12, lr}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter) : +#else + : + : [x] "r" (x), [iv] "r" (iv), [counter] "r" (counter) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r12", "lr", "r4" ); } @@ -139,9 +144,15 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) "\n" "L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t" "stm %[x], {r4, r5, r12, lr}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), [L_chacha_arm32_constants] "+r" (L_chacha_arm32_constants_c) : +#else + : + : [x] "r" (x), [key] "r" (key), [keySz] "r" (keySz), + [L_chacha_arm32_constants] "r" (L_chacha_arm32_constants_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5" ); } @@ -510,8 +521,13 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "\n" "L_chacha_arm32_crypt_done_%=: \n\t" "add sp, sp, #52\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) : +#else + : + : [ctx] "r" (ctx), [c] "r" (c), [m] "r" (m), [len] "r" (len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -589,9 +605,15 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) "b L_chacha_arm32_over_byte_loop_%=\n\t" "\n" "L_chacha_arm32_over_done_%=: \n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), [len] "+r" (len) : +#else + : + : [over] "r" (over), [output] "r" (output), [input] "r" (input), + [len] "r" (len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9" ); } diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 363dd12bf..e8f528e8c 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -74,8 +74,13 @@ void fe_init() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc" ); } @@ -288,8 +293,13 @@ void fe_add_sub_op() "sbc r11, r11, #0\n\t" "stm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Add-Sub */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -336,8 +346,13 @@ void fe_sub_op() "sbc lr, lr, #0\n\t" "stm r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Sub */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -356,8 +371,13 @@ void fe_sub(fe r, const fe a, const fe b) __asm__ __volatile__ ( "bl fe_sub_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -406,8 +426,13 @@ void fe_add_op() "adc lr, lr, #0\n\t" "stm r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Add */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -426,8 +451,13 @@ void fe_add(fe r, const fe a, const fe b) __asm__ __volatile__ ( "bl fe_add_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -467,8 +497,13 @@ void fe_frombytes(fe out, const unsigned char* in) "str r7, [%[out], #20]\n\t" "str r8, [%[out], #24]\n\t" "str r9, [%[out], #28]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [out] "+r" (out), [in] "+r" (in) : +#else + : + : [out] "r" (out), [in] "r" (in) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -517,8 +552,13 @@ void fe_tobytes(unsigned char* out, const fe n) "str r7, [%[out], #20]\n\t" "str r8, [%[out], #24]\n\t" "str r9, [%[out], #28]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [out] "+r" (out), [n] "+r" (n) : +#else + : + : [out] "r" (out), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12" ); } @@ -544,8 +584,13 @@ void fe_1(fe n) "mov r8, #0\n\t" "mov r9, #0\n\t" "stm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) : +#else + : + : [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -571,8 +616,13 @@ void fe_0(fe n) "mov r8, #0\n\t" "mov r9, #0\n\t" "stm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) : +#else + : + : [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -636,8 +686,13 @@ void fe_copy(fe r, const fe a) #else "strd r4, r5, [%[r], #24]\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -669,8 +724,13 @@ void fe_neg(fe r, const fe a) "sbcs r4, lr, r4\n\t" "sbc r5, r12, r5\n\t" "stm %[r]!, {r2, r3, r4, r5}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r12", "lr" ); } @@ -717,8 +777,13 @@ int fe_isnonzero(const fe a) "orr r4, r4, r6\n\t" "orr r2, r2, r8\n\t" "orr %[a], r2, r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12" ); @@ -750,8 +815,13 @@ int fe_isnegative(const fe a) "and %[a], r2, #1\n\t" "lsr r1, r1, #31\n\t" "eor %[a], %[a], r1\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; @@ -2291,8 +2361,13 @@ void fe_cmov_table(fe* r, fe* base, signed char b) #else "strd r8, r9, [%[r], #88]\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : +#else + : + : [r] "r" (r), [base] "r" (base), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr" ); @@ -2418,8 +2493,13 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "and r7, r7, lr\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "sub %[base], %[base], %[b]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : +#else + : + : [r] "r" (r), [base] "r" (base), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -2812,8 +2892,13 @@ void fe_mul_op() "ldr r0, [sp, #36]\n\t" "stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "add sp, sp, #40\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2961,8 +3046,13 @@ void fe_mul_op() /* Store */ "stm lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "add sp, sp, #16\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2982,8 +3072,13 @@ void fe_mul(fe r, const fe a, const fe b) __asm__ __volatile__ ( "bl fe_mul_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -3266,8 +3361,13 @@ void fe_sq_op() "ldr r0, [sp, #64]\n\t" "stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "add sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -3401,8 +3501,13 @@ void fe_sq_op() "pop {lr}\n\t" /* Store */ "stm lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -3421,8 +3526,13 @@ void fe_sq(fe r, const fe a) __asm__ __volatile__ ( "bl fe_sq_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "r11" ); @@ -3501,8 +3611,13 @@ void fe_mul121666(fe r, fe a) "adcs r8, r8, #0\n\t" "adc r9, r9, #0\n\t" "stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); @@ -3567,8 +3682,13 @@ void fe_mul121666(fe r, fe a) "adcs r8, r8, #0\n\t" "adc r9, r9, #0\n\t" "stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); @@ -3964,8 +4084,13 @@ int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r0, #0\n\t" "add sp, sp, #0xbc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : +#else + : + : [r] "r" (r), [n] "r" (n), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -4284,8 +4409,13 @@ int curve25519(byte* r, const byte* n, const byte* a) "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "mov r0, #0\n\t" "add sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : +#else + : + : [r] "r" (r), [n] "r" (n), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -4465,8 +4595,13 @@ void fe_invert(fe r, const fe a) "ldr %[a], [sp, #132]\n\t" "ldr %[r], [sp, #128]\n\t" "add sp, sp, #0x88\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -4792,8 +4927,13 @@ void fe_sq2(fe r, const fe a) "ldr r0, [sp, #64]\n\t" "stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "add sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -4977,8 +5117,13 @@ void fe_sq2(fe r, const fe a) "stm r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "mov r0, r12\n\t" "mov r1, lr\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -5154,8 +5299,13 @@ void fe_pow22523(fe r, const fe a) "ldr %[a], [sp, #100]\n\t" "ldr %[r], [sp, #96]\n\t" "add sp, sp, #0x68\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -5191,8 +5341,13 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "add r0, r0, #0x40\n\t" "bl fe_mul_op\n\t" "add sp, sp, #8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -5233,8 +5388,13 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "add r0, r0, #0x60\n\t" "bl fe_mul_op\n\t" "add sp, sp, #8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -5287,8 +5447,13 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) "mov r1, r0\n\t" "bl fe_sub_op\n\t" "add sp, sp, #8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5380,8 +5545,13 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "add r1, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #12\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5474,8 +5644,13 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "add r0, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #12\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5568,8 +5743,13 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "add r0, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5662,8 +5842,13 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "add r0, r0, #0x40\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -6457,8 +6642,13 @@ void sc_reduce(byte* s) "ldr %[s], [sp, #52]\n\t" "stm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "add sp, sp, #56\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : +#else + : + : [s] "r" (s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -7123,8 +7313,13 @@ void sc_reduce(byte* s) "ldr %[s], [sp, #52]\n\t" "stm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "add sp, sp, #56\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : +#else + : + : [s] "r" (s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -8280,8 +8475,13 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "str r8, [%[s], #24]\n\t" "str r9, [%[s], #28]\n\t" "add sp, sp, #0x50\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : +#else + : + : [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -9079,8 +9279,13 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "str r8, [%[s], #24]\n\t" "str r9, [%[s], #28]\n\t" "add sp, sp, #0x50\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : +#else + : + : [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index 7a03ed262..6442922eb 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -3095,9 +3095,15 @@ void mlkem_arm32_ntt(sword16* r) "add %[r], %[r], #32\n\t" "bne L_mlkem_arm32_ntt_loop_567_%=\n\t" "add sp, sp, #8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_arm32_ntt_zetas] "+r" (L_mlkem_arm32_ntt_zetas_c) : +#else + : + : [r] "r" (r), + [L_mlkem_arm32_ntt_zetas] "r" (L_mlkem_arm32_ntt_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -7536,9 +7542,15 @@ void mlkem_arm32_invntt(sword16* r) "add %[r], %[r], #4\n\t" "bne L_mlkem_invntt_loop_321_%=\n\t" "add sp, sp, #8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c) : +#else + : + : [r] "r" (r), + [L_mlkem_invntt_zetas_inv] "r" (L_mlkem_invntt_zetas_inv_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -7870,9 +7882,15 @@ void mlkem_arm32_basemul_mont(sword16* r, const sword16* a, const sword16* b) "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_mlkem_basemul_mont_loop_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -8204,9 +8222,15 @@ void mlkem_arm32_basemul_mont_add(sword16* r, const sword16* a, "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_mlkem_arm32_basemul_mont_add_loop_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -8393,9 +8417,15 @@ void mlkem_arm32_csubq(sword16* p) "stm %[p]!, {r2, r3, r4, r5}\n\t" "subs r1, r1, #8\n\t" "bne L_mlkem_arm32_csubq_loop_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [p] "r" (p), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -8676,9 +8706,15 @@ unsigned int mlkem_arm32_rej_uniform(sword16* p, unsigned int len, "\n" "L_mlkem_arm32_rej_uniform_done_%=: \n\t" "lsr r0, r12, #1\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [p] "r" (p), [len] "r" (len), [r] "r" (r), [rLen] "r" (rLen), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" ); return (word32)(size_t)p; diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index 15dccec1f..528e62319 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -281,9 +281,15 @@ void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, "\n" "L_poly1305_arm32_16_done_%=: \n\t" "add sp, sp, #28\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast) : +#else + : + : [ctx] "r" (ctx), [m] "r" (m), [len] "r" (len), + [notLast] "r" (notLast) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -342,9 +348,15 @@ void poly1305_set_key(Poly1305* ctx, const byte* key) "stm lr, {r5, r6, r7, r8, r12}\n\t" /* Zero leftover */ "str r5, [%[ctx], #52]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [key] "+r" (key), [L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c) : +#else + : + : [ctx] "r" (ctx), [key] "r" (key), + [L_poly1305_arm32_clamp] "r" (L_poly1305_arm32_clamp_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); } @@ -404,8 +416,13 @@ void poly1305_final(Poly1305* ctx, byte* mac) /* Zero out padding. */ "add r9, %[ctx], #36\n\t" "stm r9, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [mac] "+r" (mac) : +#else + : + : [ctx] "r" (ctx), [mac] "r" (mac) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9" ); @@ -636,9 +653,15 @@ void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, "\n" "L_poly1305_arm32_16_done_%=: \n\t" "add sp, sp, #28\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast) : +#else + : + : [ctx] "r" (ctx), [m] "r" (m), [len] "r" (len), + [notLast] "r" (notLast) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -1100,8 +1123,13 @@ void poly1305_arm32_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "stm r12, {r7, r8, r9, r10, r11}\n\t" "\n" "L_poly1305_arm32_blocks_done_%=: \n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [bytes] "+r" (bytes) : +#else + : + : [ctx] "r" (ctx), [m] "r" (m), [bytes] "r" (bytes) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", @@ -1340,9 +1368,15 @@ void poly1305_set_key(Poly1305* ctx, const byte* key) "stm lr, {r4, r5, r6, r7, r8, r9}\n\t" /* Zero leftover */ "str r5, [%[ctx], #56]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [key] "+r" (key), [L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c) : +#else + : + : [ctx] "r" (ctx), [key] "r" (key), + [L_poly1305_arm32_clamp] "r" (L_poly1305_arm32_clamp_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", @@ -1405,8 +1439,13 @@ void poly1305_final(Poly1305* ctx, byte* mac) /* Zero out padding. */ "add r9, %[ctx], #40\n\t" "stm r9, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [mac] "+r" (mac) : +#else + : + : [ctx] "r" (ctx), [mac] "r" (mac) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 4d24a43e4..391be4c14 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -1742,9 +1742,15 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add %[data], %[data], #0x40\n\t" "bne L_SHA256_transform_len_begin_%=\n\t" "add sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : +#else + : + : [sha256] "r" (sha256), [data] "r" (data), [len] "r" (len), + [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -2814,9 +2820,15 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "str r10, [sp, #8]\n\t" "bne L_SHA256_transform_neon_len_begin_%=\n\t" "add sp, sp, #24\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c) : +#else + : + : [sha256] "r" (sha256), [data] "r" (data), [len] "r" (len), + [L_SHA256_transform_neon_len_k] "r" (L_SHA256_transform_neon_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index 68887eeef..8ce9e853f 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -346,9 +346,15 @@ void BlockSha3(word64* state) "vst1.8 {d20-d23}, [%[state]]!\n\t" "vst1.8 {d24}, [%[state]]\n\t" "add sp, sp, #16\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c) : +#else + : + : [state] "r" (state), + [L_sha3_arm2_neon_rt] "r" (L_sha3_arm2_neon_rt_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", @@ -2354,8 +2360,13 @@ void BlockSha3(word64* state) "subs r2, r2, #1\n\t" "bne L_sha3_arm32_begin_%=\n\t" "add sp, sp, #0xcc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [state] "+r" (state), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c) : +#else + : + : [state] "r" (state), [L_sha3_arm2_rt] "r" (L_sha3_arm2_rt_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 5bcf77ec9..fb7e07e93 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -7534,9 +7534,15 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "bne L_SHA512_transform_len_begin_%=\n\t" "eor r0, r0, r0\n\t" "add sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : +#else + : + : [sha512] "r" (sha512), [data] "r" (data), [len] "r" (len), + [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -9099,9 +9105,15 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "subs %[len], %[len], #0x80\n\t" "sub r3, r3, #0x280\n\t" "bne L_SHA512_transform_neon_len_begin_%=\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c) : +#else + : + : [sha512] "r" (sha512), [data] "r" (data), [len] "r" (len), + [L_SHA512_transform_neon_len_k] "r" (L_SHA512_transform_neon_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" From 21c0d7803aa032adf07bb247400ebe0affa4eb38 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 27 Mar 2025 10:54:19 +1000 Subject: [PATCH 3/3] Greenhills compiler: fix asm and volatile Greenhills compiler doesn't accept volatile and __asm__ needs to be __asm. --- wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-curve25519_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c | 2 ++ wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c | 2 ++ 8 files changed, 16 insertions(+) diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 3297faaf0..64cb22312 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #ifndef NO_AES diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index a192eecef..05d14745b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #ifdef HAVE_CHACHA diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index e8f528e8c..7636bbe30 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ /* Based on work by: Emil Lenngren diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index 6442922eb..3a2ca1047 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #include diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index 528e62319..0f88a6e8b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #ifdef HAVE_POLY1305 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 391be4c14..ec86f634a 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #ifndef NO_SHA256 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index 8ce9e853f..0e670c4de 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #ifdef WOLFSSL_SHA3 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index fb7e07e93..cd20037ae 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -51,6 +51,8 @@ #define __volatile__ volatile #endif /* __KEIL__ */ #ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)