From cefe108cab5e2c6784cd29af39188c4db632056e Mon Sep 17 00:00:00 2001
From: Sean Parkinson <sean@wolfssl.com>
Date: Fri, 3 Nov 2023 08:45:10 +1000
Subject: [PATCH] Thumb2 inline ASM: IAR doesn't do register variables

IAR doesn't parse register variable declarations with specified
registers. IAR doesn't even honor the register keyword.
Can use small negative but IAR doesn't like it.
Specify the positive value instead.
Add a small code size version of mont_reduce_full using umlal and umaal.
Make 'asm' usage in variables use keyword '__asm__'.
Explicitly don't inline some functions when compiling with IAR.
---
 wolfcrypt/src/port/arm/thumb2-aes-asm_c.c    |  193 +-
 wolfcrypt/src/port/arm/thumb2-curve25519_c.c |  397 +-
 wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c |   15 +-
 wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c |   15 +-
 wolfcrypt/src/sp_cortexm.c                   | 4409 ++++++++++++++----
 wolfcrypt/src/sp_x86_64_asm.S                |    4 +-
 wolfcrypt/src/sp_x86_64_asm.asm              |    4 +-
 7 files changed, 3956 insertions(+), 1081 deletions(-)

diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
index 1564a6f9d..48b5edc16 100644
--- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -198,12 +199,18 @@ static const uint32_t* L_AES_Thumb2_te = L_AES_Thumb2_te_data;
 #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
 #ifdef HAVE_AES_DECRYPT
 void AES_invert_key(unsigned char* ks, word32 rounds);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
+#else
+void AES_invert_key(unsigned char* ks, word32 rounds)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* ks asm ("r0") = (unsigned char*)ks_p;
-    register word32 rounds asm ("r1") = (word32)rounds_p;
-    register uint32_t* L_AES_Thumb2_te_c asm ("r2") = (uint32_t*)L_AES_Thumb2_te;
-    register uint32_t* L_AES_Thumb2_td_c asm ("r3") = (uint32_t*)L_AES_Thumb2_td;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p;
+    register word32 rounds __asm__ ("r1") = (word32)rounds_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_c __asm__ ("r2") = (uint32_t*)L_AES_Thumb2_te;
+    register uint32_t* L_AES_Thumb2_td_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_td;
 
     __asm__ __volatile__ (
         "MOV	r12, %[L_AES_Thumb2_te]\n\t"
@@ -314,13 +321,19 @@ static const uint32_t L_AES_Thumb2_rcon[] = {
 
 void AES_set_encrypt_key(const unsigned char* key, word32 len,
         unsigned char* ks);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p)
+#else
+void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* key asm ("r0") = (const unsigned char*)key_p;
-    register word32 len asm ("r1") = (word32)len_p;
-    register unsigned char* ks asm ("r2") = (unsigned char*)ks_p;
-    register uint32_t* L_AES_Thumb2_te_c asm ("r3") = (uint32_t*)L_AES_Thumb2_te;
-    register uint32_t* L_AES_Thumb2_rcon_c asm ("r4") = (uint32_t*)&L_AES_Thumb2_rcon;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* key __asm__ ("r0") = (const unsigned char*)key_p;
+    register word32 len __asm__ ("r1") = (word32)len_p;
+    register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_te;
+    register uint32_t* L_AES_Thumb2_rcon_c __asm__ ("r4") = (uint32_t*)&L_AES_Thumb2_rcon;
 
     __asm__ __volatile__ (
         "MOV	r8, %[L_AES_Thumb2_te]\n\t"
@@ -534,12 +547,18 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
 }
 
 void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p)
+#else
+void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const uint32_t* te asm ("r0") = (const uint32_t*)te_p;
-    register int nr asm ("r1") = (int)nr_p;
-    register int len asm ("r2") = (int)len_p;
-    register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const uint32_t* te __asm__ ("r0") = (const uint32_t*)te_p;
+    register int nr __asm__ ("r1") = (int)nr_p;
+    register int len __asm__ ("r2") = (int)len_p;
+    register const uint32_t* ks __asm__ ("r3") = (const uint32_t*)ks_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "\n"
@@ -762,14 +781,20 @@ static const uint32_t* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data;
 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
 void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
+#else
+void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	lr, %[in]\n\t"
@@ -918,15 +943,21 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_CBC
 void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
+#else
+void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -1090,15 +1121,21 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef WOLFSSL_AES_COUNTER
 void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
+#else
+void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	r12, r4\n\t"
@@ -1283,11 +1320,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_DECRYPT
 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
 void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p)
+#else
+void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const uint32_t* td asm ("r0") = (const uint32_t*)td_p;
-    register int nr asm ("r1") = (int)nr_p;
-    register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const uint32_t* td __asm__ ("r0") = (const uint32_t*)td_p;
+    register int nr __asm__ ("r1") = (int)nr_p;
+    register const uint8_t* td4 __asm__ ("r2") = (const uint8_t*)td4_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "\n"
@@ -1543,15 +1586,21 @@ static const unsigned char L_AES_Thumb2_td4[] = {
 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
 void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
+#else
+void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb;
-    register unsigned char* L_AES_Thumb2_td4_c asm ("r6") = (unsigned char*)&L_AES_Thumb2_td4;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb;
+    register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r6") = (unsigned char*)&L_AES_Thumb2_td4;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -1697,16 +1746,22 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_CBC
 void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
+#else
+void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
-    register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb;
-    register unsigned char* L_AES_Thumb2_td4_c asm ("r7") = (unsigned char*)&L_AES_Thumb2_td4;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb;
+    register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r7") = (unsigned char*)&L_AES_Thumb2_td4;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -2033,13 +2088,19 @@ static const uint32_t L_GCM_gmult_len_r[] = {
 
 void GCM_gmult_len(unsigned char* x, const unsigned char** m,
         const unsigned char* data, unsigned long len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p)
+#else
+void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* x asm ("r0") = (unsigned char*)x_p;
-    register const unsigned char** m asm ("r1") = (const unsigned char**)m_p;
-    register const unsigned char* data asm ("r2") = (const unsigned char*)data_p;
-    register unsigned long len asm ("r3") = (unsigned long)len_p;
-    register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* x __asm__ ("r0") = (unsigned char*)x_p;
+    register const unsigned char** m __asm__ ("r1") = (const unsigned char**)m_p;
+    register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p;
+    register unsigned long len __asm__ ("r3") = (unsigned long)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_GCM_gmult_len_r_c __asm__ ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
 
     __asm__ __volatile__ (
         "MOV	lr, %[L_GCM_gmult_len_r]\n\t"
@@ -2603,15 +2664,21 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
 static const uint32_t* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data;
 void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
+#else
+void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
-    register uint32_t* L_AES_Thumb2_te_gcm_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm;
 
     __asm__ __volatile__ (
         "MOV	r12, r4\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
index 2018b8e9d..edb2af068 100644
--- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -60,8 +61,14 @@
 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
 #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_init()
+#else
+void fe_init()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "\n\t"
         :
@@ -71,8 +78,14 @@ void fe_init()
 }
 
 void fe_add_sub_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add_sub_op()
+#else
+void fe_add_sub_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Add-Sub */
         "LDRD	r4, r5, [r2]\n\t"
@@ -168,8 +181,14 @@ void fe_add_sub_op()
 }
 
 void fe_sub_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sub_op()
+#else
+void fe_sub_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Sub */
         "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@@ -202,11 +221,17 @@ void fe_sub_op()
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sub(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_sub(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_sub_op\n\t"
@@ -217,8 +242,14 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p)
 }
 
 void fe_add_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add_op()
+#else
+void fe_add_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Add */
         "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@@ -251,11 +282,17 @@ void fe_add_op()
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_add(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_add_op\n\t"
@@ -266,10 +303,16 @@ void fe_add(fe r_p, const fe a_p, const fe b_p)
 }
 
 #ifdef HAVE_ED25519
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_frombytes(fe out_p, const unsigned char* in_p)
+#else
+void fe_frombytes(fe out, const unsigned char* in)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* out asm ("r0") = (sword32*)out_p;
-    register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* out __asm__ ("r0") = (sword32*)out_p;
+    register const unsigned char* in __asm__ ("r1") = (const unsigned char*)in_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r2, [%[in]]\n\t"
@@ -295,10 +338,16 @@ void fe_frombytes(fe out_p, const unsigned char* in_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_tobytes(unsigned char* out_p, const fe n_p)
+#else
+void fe_tobytes(unsigned char* out, const fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* out asm ("r0") = (unsigned char*)out_p;
-    register const sword32* n asm ("r1") = (const sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* out __asm__ ("r0") = (unsigned char*)out_p;
+    register const sword32* n __asm__ ("r1") = (const sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@@ -335,9 +384,15 @@ void fe_tobytes(unsigned char* out_p, const fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_1(fe n_p)
+#else
+void fe_1(fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* n asm ("r0") = (sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* n __asm__ ("r0") = (sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Set one */
@@ -356,9 +411,15 @@ void fe_1(fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_0(fe n_p)
+#else
+void fe_0(fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* n asm ("r0") = (sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* n __asm__ ("r0") = (sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Set zero */
@@ -377,10 +438,16 @@ void fe_0(fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_copy(fe r_p, const fe a_p)
+#else
+void fe_copy(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Copy */
@@ -398,10 +465,16 @@ void fe_copy(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_neg(fe r_p, const fe a_p)
+#else
+void fe_neg(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MVN	r7, #0x0\n\t"
@@ -425,9 +498,15 @@ void fe_neg(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int fe_isnonzero(const fe a_p)
+#else
+int fe_isnonzero(const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sword32* a asm ("r0") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@@ -464,9 +543,15 @@ int fe_isnonzero(const fe a_p)
     return (uint32_t)(size_t)a;
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int fe_isnegative(const fe a_p)
+#else
+int fe_isnegative(const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sword32* a asm ("r0") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r2, r3, r4, r5}\n\t"
@@ -492,11 +577,17 @@ int fe_isnegative(const fe a_p)
 
 #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN)
 #ifndef WC_NO_CACHE_RESISTANT
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
+#else
+void fe_cmov_table(fe* r, fe* base, signed char b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register fe* r asm ("r0") = (fe*)r_p;
-    register fe* base asm ("r1") = (fe*)base_p;
-    register signed char b asm ("r2") = (signed char)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register fe* r __asm__ ("r0") = (fe*)r_p;
+    register fe* base __asm__ ("r1") = (fe*)base_p;
+    register signed char b __asm__ ("r2") = (signed char)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SXTB	%[b], %[b]\n\t"
@@ -1469,11 +1560,17 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
+#else
+void fe_cmov_table(fe* r, fe* base, signed char b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register fe* r asm ("r0") = (fe*)r_p;
-    register fe* base asm ("r1") = (fe*)base_p;
-    register signed char b asm ("r2") = (signed char)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register fe* r __asm__ ("r0") = (fe*)r_p;
+    register fe* base __asm__ ("r1") = (fe*)base_p;
+    register signed char b __asm__ ("r2") = (signed char)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SXTB	%[b], %[b]\n\t"
@@ -1579,8 +1676,14 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
 #endif /* HAVE_ED25519 */
 #ifdef WOLFSSL_SP_NO_UMAAL
 void fe_mul_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul_op()
+#else
+void fe_mul_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x28\n\t"
         "STR	r0, [sp, #36]\n\t"
@@ -1959,8 +2062,14 @@ void fe_mul_op()
 
 #else
 void fe_mul_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul_op()
+#else
+void fe_mul_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
         "STRD	r0, r1, [sp, #36]\n\t"
@@ -2092,11 +2201,17 @@ void fe_mul_op()
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_mul(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_mul_op\n\t"
@@ -2108,8 +2223,14 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p)
 
 #ifdef WOLFSSL_SP_NO_UMAAL
 void fe_sq_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq_op()
+#else
+void fe_sq_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
         "STR	r0, [sp, #64]\n\t"
@@ -2381,8 +2502,14 @@ void fe_sq_op()
 
 #else
 void fe_sq_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq_op()
+#else
+void fe_sq_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	r0, [sp, #28]\n\t"
@@ -2500,10 +2627,16 @@ void fe_sq_op()
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq(fe r_p, const fe a_p)
+#else
+void fe_sq(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_sq_op\n\t"
@@ -2515,10 +2648,16 @@ void fe_sq(fe r_p, const fe a_p)
 
 #ifdef HAVE_CURVE25519
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul121666(fe r_p, fe a_p)
+#else
+void fe_mul121666(fe r, fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register sword32* a asm ("r1") = (sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register sword32* a __asm__ ("r1") = (sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Multiply by 121666 */
@@ -2568,10 +2707,16 @@ void fe_mul121666(fe r_p, fe a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul121666(fe r_p, fe a_p)
+#else
+void fe_mul121666(fe r, fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register sword32* a asm ("r1") = (sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register sword32* a __asm__ ("r1") = (sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Multiply by 121666 */
@@ -2609,11 +2754,17 @@ void fe_mul121666(fe r_p, fe a_p)
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
 #ifndef WC_NO_CACHE_RESISTANT
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
+#else
+int curve25519(byte* r, const byte* n, const byte* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* r asm ("r0") = (byte*)r_p;
-    register const byte* n asm ("r1") = (const byte*)n_p;
-    register const byte* a asm ("r2") = (const byte*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* r __asm__ ("r0") = (byte*)r_p;
+    register const byte* n __asm__ ("r1") = (const byte*)n_p;
+    register const byte* a __asm__ ("r2") = (const byte*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xbc\n\t"
@@ -3039,11 +3190,17 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
+#else
+int curve25519(byte* r, const byte* n, const byte* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* r asm ("r0") = (byte*)r_p;
-    register const byte* n asm ("r1") = (const byte*)n_p;
-    register const byte* a asm ("r2") = (const byte*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* r __asm__ ("r0") = (byte*)r_p;
+    register const byte* n __asm__ ("r1") = (const byte*)n_p;
+    register const byte* a __asm__ ("r2") = (const byte*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
@@ -3387,10 +3544,16 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
 #endif /* WC_NO_CACHE_RESISTANT */
 #endif /* HAVE_CURVE25519 */
 #ifdef HAVE_ED25519
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_invert(fe r_p, const fe a_p)
+#else
+void fe_invert(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -3590,10 +3753,16 @@ void fe_invert(fe r_p, const fe a_p)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq2(fe r_p, const fe a_p)
+#else
+void fe_sq2(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -3898,10 +4067,16 @@ void fe_sq2(fe r_p, const fe a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq2(fe r_p, const fe a_p)
+#else
+void fe_sq2(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -4055,10 +4230,16 @@ void fe_sq2(fe r_p, const fe a_p)
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_pow22523(fe r_p, const fe a_p)
+#else
+void fe_pow22523(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x68\n\t"
@@ -4257,10 +4438,16 @@ void fe_pow22523(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
+#else
+void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
-    register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p2 * r __asm__ ("r0") = (ge_p2 *)r_p;
+    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4287,10 +4474,16 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
+#else
+void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
-    register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p3 * r __asm__ ("r0") = (ge_p3 *)r_p;
+    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4322,10 +4515,16 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
+#else
+void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p2 * p __asm__ ("r1") = (const ge_p2 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4369,11 +4568,17 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
+#else
+void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc\n\t"
@@ -4451,11 +4656,17 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
+#else
+void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc\n\t"
@@ -4534,11 +4745,17 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
+#else
+void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
@@ -4617,11 +4834,17 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
+#else
+void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
@@ -4701,9 +4924,15 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_reduce(byte* s_p)
+#else
+void sc_reduce(byte* s)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x38\n\t"
@@ -5129,9 +5358,15 @@ void sc_reduce(byte* s_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_reduce(byte* s_p)
+#else
+void sc_reduce(byte* s)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x38\n\t"
@@ -5430,12 +5665,18 @@ void sc_reduce(byte* s_p)
 #endif /* WOLFSSL_SP_NO_UMAAL */
 #ifdef HAVE_ED25519_SIGN
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
+#else
+void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
-    register const byte* a asm ("r1") = (const byte*)a_p;
-    register const byte* b asm ("r2") = (const byte*)b_p;
-    register const byte* c asm ("r3") = (const byte*)c_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+    register const byte* a __asm__ ("r1") = (const byte*)a_p;
+    register const byte* b __asm__ ("r2") = (const byte*)b_p;
+    register const byte* c __asm__ ("r3") = (const byte*)c_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x50\n\t"
@@ -6218,12 +6459,18 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
+#else
+void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
-    register const byte* a asm ("r1") = (const byte*)a_p;
-    register const byte* b asm ("r2") = (const byte*)b_p;
-    register const byte* c asm ("r3") = (const byte*)c_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+    register const byte* a __asm__ ("r1") = (const byte*)a_p;
+    register const byte* b __asm__ ("r2") = (const byte*)b_p;
+    register const byte* c __asm__ ("r3") = (const byte*)c_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x50\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
index 43659fb07..2483f036d 100644
--- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -73,12 +74,18 @@ static const uint32_t L_SHA256_transform_len_k[] = {
 };
 
 void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
+#else
+void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
-    register const byte* data asm ("r1") = (const byte*)data_p;
-    register word32 len asm ("r2") = (word32)len_p;
-    register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p;
+    register const byte* data __asm__ ("r1") = (const byte*)data_p;
+    register word32 len __asm__ ("r2") = (word32)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_SHA256_transform_len_k_c __asm__ ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
index d62a035cc..3dc2d1f20 100644
--- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -97,12 +98,18 @@ static const uint64_t L_SHA512_transform_len_k[] = {
 };
 
 void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
+#else
+void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
-    register const byte* data asm ("r1") = (const byte*)data_p;
-    register word32 len asm ("r2") = (word32)len_p;
-    register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p;
+    register const byte* data __asm__ ("r1") = (const byte*)data_p;
+    register word32 len __asm__ ("r2") = (word32)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint64_t* L_SHA512_transform_len_k_c __asm__ ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c
index 95c7820d7..a44fcfe51 100644
--- a/wolfcrypt/src/sp_cortexm.c
+++ b/wolfcrypt/src/sp_cortexm.c
@@ -240,17 +240,26 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a)
 
 #ifndef WOLFSSL_SP_SMALL
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -593,115 +602,129 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #36]\n\t"
+#else
+        "STR	%[r], [sp, #36]\n\t"
+        "STR	%[a], [sp, #40]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #40]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #40]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #32]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #32]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -725,11 +748,17 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -760,10 +789,16 @@ static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -808,11 +843,17 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -922,10 +963,16 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -998,11 +1045,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1144,10 +1197,16 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -1276,11 +1335,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1474,15 +1539,24 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -1717,62 +1791,71 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	%[r], [sp, #28]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -1790,10 +1873,10 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -1812,14 +1895,14 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         /* R[15] = r7 */
         "LDR	lr, [sp, #28]\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
         "STM	lr!, {r7}\n\t"
         "SUB	lr, lr, #0x40\n\t"
-        "LDM	sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
-        "STM	lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
+        "LDM	sp, {r0, r1, r2, r3, r4, r5, r6}\n\t"
+        "STM	lr, {r0, r1, r2, r3, r4, r5, r6}\n\t"
         "ADD	sp, sp, #0x20\n\t"
         : [r] "+r" (r), [a] "+r" (a)
         :
@@ -1834,11 +1917,17 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1905,11 +1994,17 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1990,11 +2085,17 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -2105,18 +2206,24 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x100\n\t"
         "\n"
     "L_sp_2048_add_64_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -2147,10 +2254,16 @@ static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -2188,11 +2301,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x200\n\t"
@@ -2263,10 +2382,16 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x200\n\t"
@@ -2383,18 +2508,24 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x80\n\t"
         "\n"
     "L_sp_2048_add_32_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -2425,10 +2556,16 @@ static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -2466,11 +2603,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -2541,10 +2684,16 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -2665,11 +2814,17 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -2712,11 +2867,17 @@ static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -3070,12 +3231,18 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -3114,12 +3281,18 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -3245,17 +3418,24 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -3550,11 +3730,125 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #128]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #128]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -3752,6 +4046,101 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #128]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #128]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -3790,11 +4179,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -3837,11 +4232,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -4021,11 +4422,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -4080,11 +4487,17 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -4144,16 +4557,22 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x7c\n\t"
         "\n"
@@ -4940,12 +5359,18 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -4984,12 +5409,18 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -5227,17 +5658,24 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -5788,11 +6226,125 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #256]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #256]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -6150,6 +6702,101 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #256]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #256]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -6188,11 +6835,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -6229,11 +6882,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -6367,11 +7026,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -6426,11 +7091,17 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -6593,16 +7264,22 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0xfc\n\t"
         "\n"
@@ -7864,12 +8541,18 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -7877,7 +8560,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_2048_cond_add_32_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -7908,12 +8591,18 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -8352,11 +9041,17 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_2048
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -9166,11 +9861,17 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -10187,11 +10888,17 @@ static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10229,10 +10936,16 @@ static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10291,11 +11004,17 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10423,10 +11142,16 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10527,11 +11252,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10701,10 +11432,16 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10889,11 +11626,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -11147,10 +11890,16 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -11842,11 +12591,17 @@ static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -11920,11 +12675,17 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -12019,11 +12780,17 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -12162,18 +12929,24 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x180\n\t"
         "\n"
     "L_sp_3072_add_96_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -12204,10 +12977,16 @@ static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -12245,11 +13024,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x300\n\t"
@@ -12320,10 +13105,16 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x300\n\t"
@@ -12440,18 +13231,24 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0xc0\n\t"
         "\n"
     "L_sp_3072_add_48_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -12482,10 +13279,16 @@ static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -12523,11 +13326,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x180\n\t"
@@ -12598,10 +13407,16 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x180\n\t"
@@ -12722,11 +13537,17 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -12769,11 +13590,17 @@ static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -13287,12 +14114,18 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -13331,12 +14164,18 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -13518,17 +14357,24 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -13951,11 +14797,125 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #192]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #192]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -14233,6 +15193,101 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #192]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #192]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -14271,11 +15326,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -14318,11 +15379,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -14582,11 +15649,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -14641,11 +15714,17 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -14705,16 +15784,22 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0xbc\n\t"
         "\n"
@@ -15677,12 +16762,18 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -15721,12 +16812,18 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -16076,17 +17173,24 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -16893,11 +17997,125 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #384]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #384]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -17415,6 +18633,101 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #384]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #384]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -17453,11 +18766,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -17494,11 +18813,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -17688,11 +19013,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -17747,11 +19078,17 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -17914,16 +19251,22 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x17c\n\t"
         "\n"
@@ -19537,12 +20880,18 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -19550,7 +20899,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_3072_cond_add_48_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -19581,12 +20930,18 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -20081,11 +21436,17 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_3072
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -21086,10 +22447,16 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -21330,11 +22697,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -21653,18 +23026,24 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x200\n\t"
         "\n"
     "L_sp_4096_add_128_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -21695,10 +23074,16 @@ static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -21736,11 +23121,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x400\n\t"
@@ -21811,10 +23202,16 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x400\n\t"
@@ -21933,11 +23330,17 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -21980,11 +23383,17 @@ static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -22659,12 +24068,18 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -22703,12 +24118,18 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -23170,17 +24591,24 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 4096 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -24243,11 +25671,125 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #512]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #512]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -24925,6 +26467,101 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #512]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #512]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -24963,11 +26600,17 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -25004,11 +26647,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -25254,11 +26903,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -25313,11 +26968,17 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -25480,16 +27141,22 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x1fc\n\t"
         "\n"
@@ -27455,12 +29122,18 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -27468,7 +29141,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_4096_cond_add_64_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -27499,12 +29172,18 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -28055,11 +29734,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_4096
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -29126,11 +30811,17 @@ static const sp_digit p256_b[8] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -29198,17 +30889,26 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
 
 #else
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -29551,115 +31251,129 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #36]\n\t"
+#else
+        "STR	%[r], [sp, #36]\n\t"
+        "STR	%[a], [sp, #40]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #40]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #40]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #32]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #32]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -29684,10 +31398,16 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -29780,15 +31500,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 
 #else
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -30023,62 +31752,71 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	%[r], [sp, #28]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -30096,10 +31834,10 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -30118,14 +31856,14 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         /* R[15] = r7 */
         "LDR	lr, [sp, #28]\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
         "STM	lr!, {r7}\n\t"
         "SUB	lr, lr, #0x40\n\t"
-        "LDM	sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
-        "STM	lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
+        "LDM	sp, {r0, r1, r2, r3, r4, r5, r6}\n\t"
+        "STM	lr, {r0, r1, r2, r3, r4, r5, r6}\n\t"
         "ADD	sp, sp, #0x20\n\t"
         : [r] "+r" (r), [a] "+r" (a)
         :
@@ -30142,18 +31880,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x20\n\t"
         "\n"
     "L_sp_256_add_8_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -30184,11 +31928,17 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -30221,10 +31971,16 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  The number to convert.
  * m  The modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x18\n\t"
@@ -30232,7 +31988,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         /* Clear overflow and underflow */
         "MOV	r11, #0x0\n\t"
         "MOV	r12, #0x0\n\t"
-        "# t[0] =  1  1  0 -1 -1 -1 -1  0\n\t"
+        /* t[0] =  1  1  0 -1 -1 -1 -1  0 */
         "ADDS	r10, r2, r3\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "SUBS	r10, r10, r5\n\t"
@@ -30247,7 +32003,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[1] =  0  1  1  0 -1 -1 -1 -1\n\t"
+        /* t[1] =  0  1  1  0 -1 -1 -1 -1 */
         "ADDS	r11, r11, r3\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r4\n\t"
@@ -30266,7 +32022,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #4]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[2] =  0  0  1  1  0 -1 -1 -1\n\t"
+        /* t[2] =  0  0  1  1  0 -1 -1 -1 */
         "ADDS	r10, r10, r4\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r5\n\t"
@@ -30283,7 +32039,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp, #8]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[3] = -1 -1  0  2  2  1  0 -1\n\t"
+        /* t[3] = -1 -1  0  2  2  1  0 -1 */
         "ADDS	r11, r11, r5\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r5\n\t"
@@ -30306,7 +32062,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #12]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[4] =  0 -1 -1  0  2  2  1  0\n\t"
+        /* t[4] =  0 -1 -1  0  2  2  1  0 */
         "ADDS	r10, r10, r6\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r6\n\t"
@@ -30327,7 +32083,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp, #16]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[5] =  0  0 -1 -1  0  2  2  1\n\t"
+        /* t[5] =  0  0 -1 -1  0  2  2  1 */
         "ADDS	r11, r11, r7\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r7\n\t"
@@ -30348,7 +32104,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #20]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[6] = -1 -1  0  0  0  1  3  2\n\t"
+        /* t[6] = -1 -1  0  0  0  1  3  2 */
         "ADDS	r10, r10, r7\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r8\n\t"
@@ -30371,7 +32127,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "MOV	r8, r10\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[7] =  1  0 -1 -1 -1 -1  0  3\n\t"
+        /* t[7] =  1  0 -1 -1 -1 -1  0  3 */
         "ADDS	r11, r11, r2\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r9\n\t"
@@ -30447,7 +32203,11 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     return (uint32_t)(size_t)r;
 }
 
@@ -30647,6 +32407,9 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -30656,11 +32419,17 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -31119,11 +32888,22 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -31133,109 +32913,120 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x4c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #68]\n\t"
+#else
+        "STR	%[r], [sp, #68]\n\t"
+        "STR	%[a], [sp, #72]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #72]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #72]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #64]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #64]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -31369,12 +33160,23 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #endif
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -31382,10 +33184,16 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -31736,11 +33544,22 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -31748,57 +33567,63 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
         "STR	%[r], [sp, #64]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -31816,10 +33641,10 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -31838,7 +33663,7 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         /* R[15] = r7 */
         "MOV	lr, sp\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
@@ -31971,8 +33796,16 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #endif
@@ -32079,16 +33912,22 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x1c\n\t"
         "\n"
@@ -32221,12 +34060,18 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -32265,12 +34110,18 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -32321,11 +34172,17 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -32428,11 +34285,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -32518,9 +34381,15 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -32658,8 +34527,16 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
         :
         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
@@ -32669,11 +34546,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -32776,11 +34659,17 @@ static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_di
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -32907,11 +34796,17 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	lr, #0x0\n\t"
@@ -32953,7 +34848,11 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Double a Montgomery form number (r = a + a % m).
@@ -32962,10 +34861,16 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -33003,7 +34908,11 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m).
@@ -33012,10 +34921,16 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -33085,7 +35000,11 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m).
@@ -33095,11 +35014,17 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	lr, #0x0\n\t"
@@ -33139,7 +35064,11 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
@@ -33148,11 +35077,17 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  Number to divide.
  * m  Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r4, r5, r6, r7}\n\t"
@@ -36706,9 +38641,15 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_add_one_8(sp_digit* a_p)
+#else
+static void sp_256_add_one_8(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -37104,10 +39045,16 @@ int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -37143,10 +39090,16 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -37179,11 +39132,17 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -37226,11 +39185,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -37290,11 +39255,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -37349,11 +39320,17 @@ static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -38058,11 +40035,17 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -38099,11 +40082,17 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -38129,10 +40118,16 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -38173,11 +40168,17 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p)
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -38237,9 +40238,15 @@ static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_256_num_bits_8(const sp_digit* a_p)
+#else
+static int sp_256_num_bits_8(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #28]\n\t"
@@ -39439,11 +41446,17 @@ static const sp_digit p384_b[12] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x60\n\t"
@@ -39516,11 +41529,17 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -40538,10 +42557,16 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x60\n\t"
@@ -40638,10 +42663,16 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -41335,18 +43366,24 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x30\n\t"
         "\n"
     "L_sp_384_add_12_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -41377,11 +43414,17 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -41721,12 +43764,18 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -41765,12 +43814,18 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -41834,11 +43889,17 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -41973,11 +44034,17 @@ static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit m
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -42225,16 +44292,22 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x2c\n\t"
         "\n"
@@ -42449,12 +44522,18 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42468,11 +44547,17 @@ static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42486,11 +44571,17 @@ static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42507,11 +44598,17 @@ static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -42548,11 +44645,17 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -42594,12 +44697,18 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -42607,7 +44716,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_384_cond_add_12_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -42638,12 +44747,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -42705,12 +44820,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42721,10 +44842,16 @@ static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
 #ifdef WOLFSSL_SP_SMALL
 #else
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
@@ -46360,9 +48487,15 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_add_one_12(sp_digit* a_p)
+#else
+static void sp_384_add_one_12(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -46764,10 +48897,16 @@ int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -46803,10 +48942,16 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -46846,11 +48991,17 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -46893,11 +49044,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -46977,11 +49134,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -47036,11 +49199,17 @@ static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -47715,11 +49884,17 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4}\n\t"
@@ -47817,9 +49992,15 @@ static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_384_num_bits_12(const sp_digit* a_p)
+#else
+static int sp_384_num_bits_12(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #44]\n\t"
@@ -49117,11 +51298,17 @@ static const sp_digit p521_b[17] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -49197,11 +51384,17 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -51233,10 +53426,16 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_sqr_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -51336,10 +53535,16 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_sqr_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -52617,18 +54822,24 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x40\n\t"
         "\n"
     "L_sp_521_add_17_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -52644,7 +54855,7 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
 #else
         "BNE.N	L_sp_521_add_17_word_%=\n\t"
 #endif
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a], {r4}\n\t"
         "LDM	%[b], {r8}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -52665,11 +54876,17 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -52936,12 +55153,18 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -52980,12 +55203,18 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -53065,9 +55294,15 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -53177,8 +55412,16 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m
         :
         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
@@ -53188,11 +55431,17 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -53448,11 +55697,17 @@ static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_d
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -53803,16 +56058,22 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x40\n\t"
         "\n"
@@ -54082,11 +56343,17 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
@@ -54153,7 +56420,11 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Double a Montgomery form number (r = a + a % m).
@@ -54162,10 +56433,16 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -54223,7 +56500,11 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m).
@@ -54232,10 +56513,16 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -54313,7 +56600,11 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m).
@@ -54323,11 +56614,17 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
@@ -54395,13 +56692,23 @@ static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
@@ -58679,9 +60986,15 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_add_one_17(sp_digit* a_p)
+#else
+static void sp_521_add_one_17(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -59085,11 +61398,17 @@ int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
 #endif /* HAVE_ECC_DHE */
 
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x20\n\t"
@@ -59184,11 +61503,17 @@ static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -59300,11 +61625,17 @@ static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -59524,10 +61855,16 @@ static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -59568,10 +61905,16 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -59622,11 +61965,17 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -59669,11 +62018,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -59778,11 +62133,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -59837,11 +62198,17 @@ static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -60547,11 +62914,17 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -60593,11 +62966,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -60647,11 +63026,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4}\n\t"
@@ -60784,9 +63169,15 @@ static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_521_num_bits_17(const sp_digit* a_p)
+#else
+static int sp_521_num_bits_17(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #64]\n\t"
@@ -62032,11 +64423,17 @@ typedef struct sp_point_1024 {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -63834,10 +66231,16 @@ static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -64981,11 +67384,17 @@ static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65030,10 +67439,16 @@ static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -65106,11 +67521,17 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65253,11 +67674,17 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65339,11 +67766,17 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -65414,10 +67847,16 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -65600,10 +68039,16 @@ static const sp_point_1024 p1024_base = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -65643,12 +68088,18 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -65687,12 +68138,18 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -65824,18 +68281,24 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x80\n\t"
         "\n"
     "L_sp_1024_add_32_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -65867,11 +68330,17 @@ static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -65914,11 +68383,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -66098,11 +68573,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -66157,11 +68638,17 @@ static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -66251,16 +68738,22 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x7c\n\t"
         "\n"
@@ -66977,11 +69470,17 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm)
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -67281,11 +69780,17 @@ static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -67633,12 +70138,18 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -67805,11 +70316,17 @@ static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -67960,11 +70477,17 @@ static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -68271,12 +70794,18 @@ static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
@@ -68440,12 +70969,18 @@ static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -68453,7 +70988,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_1024_cond_add_32_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -68484,12 +71019,18 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -68614,10 +71155,16 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S
index 4c423bc82..6879391de 100644
--- a/wolfcrypt/src/sp_x86_64_asm.S
+++ b/wolfcrypt/src/sp_x86_64_asm.S
@@ -1,5 +1,5 @@
-/* sp_x86_64_asm
- *
+/* sp_x86_64_asm.S */
+/*
  * Copyright (C) 2006-2023 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm
index 9e7523d7b..fa6558cc9 100644
--- a/wolfcrypt/src/sp_x86_64_asm.asm
+++ b/wolfcrypt/src/sp_x86_64_asm.asm
@@ -1,5 +1,5 @@
-; /* sp_x86_64_asm
-;  *
+; /* sp_x86_64_asm.asm */
+; /*
 ;  * Copyright (C) 2006-2023 wolfSSL Inc.
 ;  *
 ;  * This file is part of wolfSSL.