diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
index 1564a6f9d..48b5edc16 100644
--- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -198,12 +199,18 @@ static const uint32_t* L_AES_Thumb2_te = L_AES_Thumb2_te_data;
 #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
 #ifdef HAVE_AES_DECRYPT
 void AES_invert_key(unsigned char* ks, word32 rounds);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
+#else
+void AES_invert_key(unsigned char* ks, word32 rounds)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* ks asm ("r0") = (unsigned char*)ks_p;
-    register word32 rounds asm ("r1") = (word32)rounds_p;
-    register uint32_t* L_AES_Thumb2_te_c asm ("r2") = (uint32_t*)L_AES_Thumb2_te;
-    register uint32_t* L_AES_Thumb2_td_c asm ("r3") = (uint32_t*)L_AES_Thumb2_td;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p;
+    register word32 rounds __asm__ ("r1") = (word32)rounds_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_c __asm__ ("r2") = (uint32_t*)L_AES_Thumb2_te;
+    register uint32_t* L_AES_Thumb2_td_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_td;
 
     __asm__ __volatile__ (
         "MOV	r12, %[L_AES_Thumb2_te]\n\t"
@@ -314,13 +321,19 @@ static const uint32_t L_AES_Thumb2_rcon[] = {
 
 void AES_set_encrypt_key(const unsigned char* key, word32 len,
         unsigned char* ks);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p)
+#else
+void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* key asm ("r0") = (const unsigned char*)key_p;
-    register word32 len asm ("r1") = (word32)len_p;
-    register unsigned char* ks asm ("r2") = (unsigned char*)ks_p;
-    register uint32_t* L_AES_Thumb2_te_c asm ("r3") = (uint32_t*)L_AES_Thumb2_te;
-    register uint32_t* L_AES_Thumb2_rcon_c asm ("r4") = (uint32_t*)&L_AES_Thumb2_rcon;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* key __asm__ ("r0") = (const unsigned char*)key_p;
+    register word32 len __asm__ ("r1") = (word32)len_p;
+    register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_te;
+    register uint32_t* L_AES_Thumb2_rcon_c __asm__ ("r4") = (uint32_t*)&L_AES_Thumb2_rcon;
 
     __asm__ __volatile__ (
         "MOV	r8, %[L_AES_Thumb2_te]\n\t"
@@ -534,12 +547,18 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
 }
 
 void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p)
+#else
+void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const uint32_t* te asm ("r0") = (const uint32_t*)te_p;
-    register int nr asm ("r1") = (int)nr_p;
-    register int len asm ("r2") = (int)len_p;
-    register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const uint32_t* te __asm__ ("r0") = (const uint32_t*)te_p;
+    register int nr __asm__ ("r1") = (int)nr_p;
+    register int len __asm__ ("r2") = (int)len_p;
+    register const uint32_t* ks __asm__ ("r3") = (const uint32_t*)ks_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "\n"
@@ -762,14 +781,20 @@ static const uint32_t* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data;
 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
 void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
+#else
+void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	lr, %[in]\n\t"
@@ -918,15 +943,21 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_CBC
 void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
+#else
+void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -1090,15 +1121,21 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef WOLFSSL_AES_COUNTER
 void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
+#else
+void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
-    register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb;
 
     __asm__ __volatile__ (
         "MOV	r12, r4\n\t"
@@ -1283,11 +1320,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_DECRYPT
 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
 void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p)
+#else
+void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const uint32_t* td asm ("r0") = (const uint32_t*)td_p;
-    register int nr asm ("r1") = (int)nr_p;
-    register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const uint32_t* td __asm__ ("r0") = (const uint32_t*)td_p;
+    register int nr __asm__ ("r1") = (int)nr_p;
+    register const uint8_t* td4 __asm__ ("r2") = (const uint8_t*)td4_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "\n"
@@ -1543,15 +1586,21 @@ static const unsigned char L_AES_Thumb2_td4[] = {
 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
 void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
+#else
+void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb;
-    register unsigned char* L_AES_Thumb2_td4_c asm ("r6") = (unsigned char*)&L_AES_Thumb2_td4;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb;
+    register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r6") = (unsigned char*)&L_AES_Thumb2_td4;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -1697,16 +1746,22 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
 #ifdef HAVE_AES_CBC
 void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
+#else
+void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
-    register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb;
-    register unsigned char* L_AES_Thumb2_td4_c asm ("r7") = (unsigned char*)&L_AES_Thumb2_td4;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb;
+    register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r7") = (unsigned char*)&L_AES_Thumb2_td4;
 
     __asm__ __volatile__ (
         "MOV	r8, r4\n\t"
@@ -2033,13 +2088,19 @@ static const uint32_t L_GCM_gmult_len_r[] = {
 
 void GCM_gmult_len(unsigned char* x, const unsigned char** m,
         const unsigned char* data, unsigned long len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p)
+#else
+void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* x asm ("r0") = (unsigned char*)x_p;
-    register const unsigned char** m asm ("r1") = (const unsigned char**)m_p;
-    register const unsigned char* data asm ("r2") = (const unsigned char*)data_p;
-    register unsigned long len asm ("r3") = (unsigned long)len_p;
-    register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* x __asm__ ("r0") = (unsigned char*)x_p;
+    register const unsigned char** m __asm__ ("r1") = (const unsigned char**)m_p;
+    register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p;
+    register unsigned long len __asm__ ("r3") = (unsigned long)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_GCM_gmult_len_r_c __asm__ ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
 
     __asm__ __volatile__ (
         "MOV	lr, %[L_GCM_gmult_len_r]\n\t"
@@ -2603,15 +2664,21 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
 static const uint32_t* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data;
 void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
         unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
+#else
+void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
-    register unsigned char* out asm ("r1") = (unsigned char*)out_p;
-    register unsigned long len asm ("r2") = (unsigned long)len_p;
-    register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
-    register int nr asm ("r4") = (int)nr_p;
-    register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
-    register uint32_t* L_AES_Thumb2_te_gcm_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p;
+    register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p;
+    register unsigned long len __asm__ ("r2") = (unsigned long)len_p;
+    register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p;
+    register int nr __asm__ ("r4") = (int)nr_p;
+    register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm;
 
     __asm__ __volatile__ (
         "MOV	r12, r4\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
index 2018b8e9d..edb2af068 100644
--- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -60,8 +61,14 @@
 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
 #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_init()
+#else
+void fe_init()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "\n\t"
         :
@@ -71,8 +78,14 @@ void fe_init()
 }
 
 void fe_add_sub_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add_sub_op()
+#else
+void fe_add_sub_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Add-Sub */
         "LDRD	r4, r5, [r2]\n\t"
@@ -168,8 +181,14 @@ void fe_add_sub_op()
 }
 
 void fe_sub_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sub_op()
+#else
+void fe_sub_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Sub */
         "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@@ -202,11 +221,17 @@ void fe_sub_op()
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sub(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_sub(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_sub_op\n\t"
@@ -217,8 +242,14 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p)
 }
 
 void fe_add_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add_op()
+#else
+void fe_add_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         /* Add */
         "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@@ -251,11 +282,17 @@ void fe_add_op()
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_add(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_add(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_add_op\n\t"
@@ -266,10 +303,16 @@ void fe_add(fe r_p, const fe a_p, const fe b_p)
 }
 
 #ifdef HAVE_ED25519
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_frombytes(fe out_p, const unsigned char* in_p)
+#else
+void fe_frombytes(fe out, const unsigned char* in)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* out asm ("r0") = (sword32*)out_p;
-    register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* out __asm__ ("r0") = (sword32*)out_p;
+    register const unsigned char* in __asm__ ("r1") = (const unsigned char*)in_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r2, [%[in]]\n\t"
@@ -295,10 +338,16 @@ void fe_frombytes(fe out_p, const unsigned char* in_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_tobytes(unsigned char* out_p, const fe n_p)
+#else
+void fe_tobytes(unsigned char* out, const fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register unsigned char* out asm ("r0") = (unsigned char*)out_p;
-    register const sword32* n asm ("r1") = (const sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register unsigned char* out __asm__ ("r0") = (unsigned char*)out_p;
+    register const sword32* n __asm__ ("r1") = (const sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@@ -335,9 +384,15 @@ void fe_tobytes(unsigned char* out_p, const fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_1(fe n_p)
+#else
+void fe_1(fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* n asm ("r0") = (sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* n __asm__ ("r0") = (sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Set one */
@@ -356,9 +411,15 @@ void fe_1(fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_0(fe n_p)
+#else
+void fe_0(fe n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* n asm ("r0") = (sword32*)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* n __asm__ ("r0") = (sword32*)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Set zero */
@@ -377,10 +438,16 @@ void fe_0(fe n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_copy(fe r_p, const fe a_p)
+#else
+void fe_copy(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Copy */
@@ -398,10 +465,16 @@ void fe_copy(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_neg(fe r_p, const fe a_p)
+#else
+void fe_neg(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MVN	r7, #0x0\n\t"
@@ -425,9 +498,15 @@ void fe_neg(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int fe_isnonzero(const fe a_p)
+#else
+int fe_isnonzero(const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sword32* a asm ("r0") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@@ -464,9 +543,15 @@ int fe_isnonzero(const fe a_p)
     return (uint32_t)(size_t)a;
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int fe_isnegative(const fe a_p)
+#else
+int fe_isnegative(const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sword32* a asm ("r0") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r2, r3, r4, r5}\n\t"
@@ -492,11 +577,17 @@ int fe_isnegative(const fe a_p)
 
 #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN)
 #ifndef WC_NO_CACHE_RESISTANT
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
+#else
+void fe_cmov_table(fe* r, fe* base, signed char b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register fe* r asm ("r0") = (fe*)r_p;
-    register fe* base asm ("r1") = (fe*)base_p;
-    register signed char b asm ("r2") = (signed char)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register fe* r __asm__ ("r0") = (fe*)r_p;
+    register fe* base __asm__ ("r1") = (fe*)base_p;
+    register signed char b __asm__ ("r2") = (signed char)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SXTB	%[b], %[b]\n\t"
@@ -1469,11 +1560,17 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
+#else
+void fe_cmov_table(fe* r, fe* base, signed char b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register fe* r asm ("r0") = (fe*)r_p;
-    register fe* base asm ("r1") = (fe*)base_p;
-    register signed char b asm ("r2") = (signed char)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register fe* r __asm__ ("r0") = (fe*)r_p;
+    register fe* base __asm__ ("r1") = (fe*)base_p;
+    register signed char b __asm__ ("r2") = (signed char)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SXTB	%[b], %[b]\n\t"
@@ -1579,8 +1676,14 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
 #endif /* HAVE_ED25519 */
 #ifdef WOLFSSL_SP_NO_UMAAL
 void fe_mul_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul_op()
+#else
+void fe_mul_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x28\n\t"
         "STR	r0, [sp, #36]\n\t"
@@ -1959,8 +2062,14 @@ void fe_mul_op()
 
 #else
 void fe_mul_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul_op()
+#else
+void fe_mul_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
         "STRD	r0, r1, [sp, #36]\n\t"
@@ -2092,11 +2201,17 @@ void fe_mul_op()
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul(fe r_p, const fe a_p, const fe b_p)
+#else
+void fe_mul(fe r, const fe a, const fe b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
-    register const sword32* b asm ("r2") = (const sword32*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_mul_op\n\t"
@@ -2108,8 +2223,14 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p)
 
 #ifdef WOLFSSL_SP_NO_UMAAL
 void fe_sq_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq_op()
+#else
+void fe_sq_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
         "STR	r0, [sp, #64]\n\t"
@@ -2381,8 +2502,14 @@ void fe_sq_op()
 
 #else
 void fe_sq_op(void);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq_op()
+#else
+void fe_sq_op()
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	r0, [sp, #28]\n\t"
@@ -2500,10 +2627,16 @@ void fe_sq_op()
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq(fe r_p, const fe a_p)
+#else
+void fe_sq(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "BL	fe_sq_op\n\t"
@@ -2515,10 +2648,16 @@ void fe_sq(fe r_p, const fe a_p)
 
 #ifdef HAVE_CURVE25519
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul121666(fe r_p, fe a_p)
+#else
+void fe_mul121666(fe r, fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register sword32* a asm ("r1") = (sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register sword32* a __asm__ ("r1") = (sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Multiply by 121666 */
@@ -2568,10 +2707,16 @@ void fe_mul121666(fe r_p, fe a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_mul121666(fe r_p, fe a_p)
+#else
+void fe_mul121666(fe r, fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register sword32* a asm ("r1") = (sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register sword32* a __asm__ ("r1") = (sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* Multiply by 121666 */
@@ -2609,11 +2754,17 @@ void fe_mul121666(fe r_p, fe a_p)
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
 #ifndef WC_NO_CACHE_RESISTANT
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
+#else
+int curve25519(byte* r, const byte* n, const byte* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* r asm ("r0") = (byte*)r_p;
-    register const byte* n asm ("r1") = (const byte*)n_p;
-    register const byte* a asm ("r2") = (const byte*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* r __asm__ ("r0") = (byte*)r_p;
+    register const byte* n __asm__ ("r1") = (const byte*)n_p;
+    register const byte* a __asm__ ("r2") = (const byte*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xbc\n\t"
@@ -3039,11 +3190,17 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
+#else
+int curve25519(byte* r, const byte* n, const byte* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* r asm ("r0") = (byte*)r_p;
-    register const byte* n asm ("r1") = (const byte*)n_p;
-    register const byte* a asm ("r2") = (const byte*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* r __asm__ ("r0") = (byte*)r_p;
+    register const byte* n __asm__ ("r1") = (const byte*)n_p;
+    register const byte* a __asm__ ("r2") = (const byte*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
@@ -3387,10 +3544,16 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
 #endif /* WC_NO_CACHE_RESISTANT */
 #endif /* HAVE_CURVE25519 */
 #ifdef HAVE_ED25519
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_invert(fe r_p, const fe a_p)
+#else
+void fe_invert(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -3590,10 +3753,16 @@ void fe_invert(fe r_p, const fe a_p)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq2(fe r_p, const fe a_p)
+#else
+void fe_sq2(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -3898,10 +4067,16 @@ void fe_sq2(fe r_p, const fe a_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_sq2(fe r_p, const fe a_p)
+#else
+void fe_sq2(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -4055,10 +4230,16 @@ void fe_sq2(fe r_p, const fe a_p)
 }
 
 #endif /* WOLFSSL_SP_NO_UMAAL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void fe_pow22523(fe r_p, const fe a_p)
+#else
+void fe_pow22523(fe r, const fe a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sword32* r asm ("r0") = (sword32*)r_p;
-    register const sword32* a asm ("r1") = (const sword32*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sword32* r __asm__ ("r0") = (sword32*)r_p;
+    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x68\n\t"
@@ -4257,10 +4438,16 @@ void fe_pow22523(fe r_p, const fe a_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
+#else
+void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
-    register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p2 * r __asm__ ("r0") = (ge_p2 *)r_p;
+    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4287,10 +4474,16 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
+#else
+void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
-    register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p3 * r __asm__ ("r0") = (ge_p3 *)r_p;
+    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4322,10 +4515,16 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
+#else
+void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p2 * p __asm__ ("r1") = (const ge_p2 *)p_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x8\n\t"
@@ -4369,11 +4568,17 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
+#else
+void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc\n\t"
@@ -4451,11 +4656,17 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
+#else
+void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc\n\t"
@@ -4534,11 +4745,17 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
+#else
+void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
@@ -4617,11 +4834,17 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
+#else
+void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
-    register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
-    register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
+    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
+    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
@@ -4701,9 +4924,15 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_reduce(byte* s_p)
+#else
+void sc_reduce(byte* s)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x38\n\t"
@@ -5129,9 +5358,15 @@ void sc_reduce(byte* s_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_reduce(byte* s_p)
+#else
+void sc_reduce(byte* s)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x38\n\t"
@@ -5430,12 +5665,18 @@ void sc_reduce(byte* s_p)
 #endif /* WOLFSSL_SP_NO_UMAAL */
 #ifdef HAVE_ED25519_SIGN
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
+#else
+void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
-    register const byte* a asm ("r1") = (const byte*)a_p;
-    register const byte* b asm ("r2") = (const byte*)b_p;
-    register const byte* c asm ("r3") = (const byte*)c_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+    register const byte* a __asm__ ("r1") = (const byte*)a_p;
+    register const byte* b __asm__ ("r2") = (const byte*)b_p;
+    register const byte* c __asm__ ("r3") = (const byte*)c_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x50\n\t"
@@ -6218,12 +6459,18 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
 }
 
 #else
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
+#else
+void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register byte* s asm ("r0") = (byte*)s_p;
-    register const byte* a asm ("r1") = (const byte*)a_p;
-    register const byte* b asm ("r2") = (const byte*)b_p;
-    register const byte* c asm ("r3") = (const byte*)c_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register byte* s __asm__ ("r0") = (byte*)s_p;
+    register const byte* a __asm__ ("r1") = (const byte*)a_p;
+    register const byte* b __asm__ ("r2") = (const byte*)b_p;
+    register const byte* c __asm__ ("r3") = (const byte*)c_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x50\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
index 43659fb07..2483f036d 100644
--- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -73,12 +74,18 @@ static const uint32_t L_SHA256_transform_len_k[] = {
 };
 
 void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
+#else
+void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
-    register const byte* data asm ("r1") = (const byte*)data_p;
-    register word32 len asm ("r2") = (word32)len_p;
-    register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p;
+    register const byte* data __asm__ ("r1") = (const byte*)data_p;
+    register word32 len __asm__ ("r2") = (word32)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint32_t* L_SHA256_transform_len_k_c __asm__ ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
index d62a035cc..3dc2d1f20 100644
--- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
+++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
@@ -44,6 +44,7 @@
 #ifdef __IAR_SYSTEMS_ICC__
 #define __asm__        asm
 #define __volatile__   volatile
+#define WOLFSSL_NO_VAR_ASSIGN_REG
 #endif /* __IAR_SYSTEMS_ICC__ */
 #ifdef __KEIL__
 #define __asm__        __asm
@@ -97,12 +98,18 @@ static const uint64_t L_SHA512_transform_len_k[] = {
 };
 
 void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
+#else
+void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
-    register const byte* data asm ("r1") = (const byte*)data_p;
-    register word32 len asm ("r2") = (word32)len_p;
-    register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p;
+    register const byte* data __asm__ ("r1") = (const byte*)data_p;
+    register word32 len __asm__ ("r2") = (word32)len_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+    register uint64_t* L_SHA512_transform_len_k_c __asm__ ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0xc0\n\t"
diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c
index 95c7820d7..a44fcfe51 100644
--- a/wolfcrypt/src/sp_cortexm.c
+++ b/wolfcrypt/src/sp_cortexm.c
@@ -240,17 +240,26 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a)
 
 #ifndef WOLFSSL_SP_SMALL
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -593,115 +602,129 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #36]\n\t"
+#else
+        "STR	%[r], [sp, #36]\n\t"
+        "STR	%[a], [sp, #40]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #40]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #40]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #32]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #32]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -725,11 +748,17 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -760,10 +789,16 @@ static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -808,11 +843,17 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -922,10 +963,16 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -998,11 +1045,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1144,10 +1197,16 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -1276,11 +1335,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1474,15 +1539,24 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -1717,62 +1791,71 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	%[r], [sp, #28]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -1790,10 +1873,10 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -1812,14 +1895,14 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         /* R[15] = r7 */
         "LDR	lr, [sp, #28]\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
         "STM	lr!, {r7}\n\t"
         "SUB	lr, lr, #0x40\n\t"
-        "LDM	sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
-        "STM	lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
+        "LDM	sp, {r0, r1, r2, r3, r4, r5, r6}\n\t"
+        "STM	lr, {r0, r1, r2, r3, r4, r5, r6}\n\t"
         "ADD	sp, sp, #0x20\n\t"
         : [r] "+r" (r), [a] "+r" (a)
         :
@@ -1834,11 +1917,17 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1905,11 +1994,17 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -1990,11 +2085,17 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -2105,18 +2206,24 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x100\n\t"
         "\n"
     "L_sp_2048_add_64_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -2147,10 +2254,16 @@ static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -2188,11 +2301,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x200\n\t"
@@ -2263,10 +2382,16 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x200\n\t"
@@ -2383,18 +2508,24 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x80\n\t"
         "\n"
     "L_sp_2048_add_32_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -2425,10 +2556,16 @@ static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -2466,11 +2603,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -2541,10 +2684,16 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -2665,11 +2814,17 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -2712,11 +2867,17 @@ static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -3070,12 +3231,18 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -3114,12 +3281,18 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -3245,17 +3418,24 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -3550,11 +3730,125 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #128]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #128]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -3752,6 +4046,101 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_32_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #128]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #128]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x80\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_32_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -3790,11 +4179,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -3837,11 +4232,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -4021,11 +4422,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -4080,11 +4487,17 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -4144,16 +4557,22 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x7c\n\t"
         "\n"
@@ -4940,12 +5359,18 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -4984,12 +5409,18 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -5227,17 +5658,24 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -5788,11 +6226,125 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #256]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #256]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -6150,6 +6702,101 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_2048_mont_reduce_64_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #256]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #256]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x100\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#else
+        "BLT.N	L_sp_2048_mont_reduce_64_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -6188,11 +6835,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -6229,11 +6882,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -6367,11 +7026,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -6426,11 +7091,17 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -6593,16 +7264,22 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0xfc\n\t"
         "\n"
@@ -7864,12 +8541,18 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -7877,7 +8560,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_2048_cond_add_32_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -7908,12 +8591,18 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -8352,11 +9041,17 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_2048
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -9166,11 +9861,17 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -10187,11 +10888,17 @@ static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10229,10 +10936,16 @@ static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10291,11 +11004,17 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10423,10 +11142,16 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10527,11 +11252,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -10701,10 +11432,16 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -10889,11 +11626,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -11147,10 +11890,16 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -11842,11 +12591,17 @@ static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -11920,11 +12675,17 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -12019,11 +12780,17 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -12162,18 +12929,24 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x180\n\t"
         "\n"
     "L_sp_3072_add_96_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -12204,10 +12977,16 @@ static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -12245,11 +13024,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x300\n\t"
@@ -12320,10 +13105,16 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x300\n\t"
@@ -12440,18 +13231,24 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0xc0\n\t"
         "\n"
     "L_sp_3072_add_48_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -12482,10 +13279,16 @@ static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -12523,11 +13326,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x180\n\t"
@@ -12598,10 +13407,16 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x180\n\t"
@@ -12722,11 +13537,17 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -12769,11 +13590,17 @@ static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -13287,12 +14114,18 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -13331,12 +14164,18 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -13518,17 +14357,24 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -13951,11 +14797,125 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #192]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #192]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -14233,6 +15193,101 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_48_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #192]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #192]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0xc0\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_48_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -14271,11 +15326,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -14318,11 +15379,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -14582,11 +15649,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -14641,11 +15714,17 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -14705,16 +15784,22 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0xbc\n\t"
         "\n"
@@ -15677,12 +16762,18 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -15721,12 +16812,18 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -16076,17 +17173,24 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -16893,11 +17997,125 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #384]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #384]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -17415,6 +18633,101 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_3072_mont_reduce_96_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #384]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #384]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x180\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#else
+        "BLT.N	L_sp_3072_mont_reduce_96_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -17453,11 +18766,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -17494,11 +18813,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -17688,11 +19013,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -17747,11 +19078,17 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -17914,16 +19251,22 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x17c\n\t"
         "\n"
@@ -19537,12 +20880,18 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -19550,7 +20899,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_3072_cond_add_48_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -19581,12 +20930,18 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -20081,11 +21436,17 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_3072
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -21086,10 +22447,16 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -21330,11 +22697,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -21653,18 +23026,24 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x200\n\t"
         "\n"
     "L_sp_4096_add_128_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -21695,10 +23074,16 @@ static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -21736,11 +23121,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x400\n\t"
@@ -21811,10 +23202,16 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x400\n\t"
@@ -21933,11 +23330,17 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -21980,11 +23383,17 @@ static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -22659,12 +24068,18 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -22703,12 +24118,18 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -23170,17 +24591,24 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s
 
 #endif /* WOLFSSL_SP_SMALL */
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifndef WOLFSSL_SP_SMALL
 /* Reduce the number back to 4096 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -24243,11 +25671,125 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "MOV	r5, #0x0\n\t"
+        "UMLAL	r10, r5, r8, r7\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        "ADC	r4, r5, #0x0\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #512]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #512]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#else
+#ifndef WOLFSSL_SP_SMALL
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -24925,6 +26467,101 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit
     sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
 }
 
+#else
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a   A single precision number to reduce in place.
+ * m   The single precision number representing the modulus.
+ * mp  The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+{
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+
+    __asm__ __volatile__ (
+        "LDR	r11, [%[m]]\n\t"
+        /* i = 0 */
+        "MOV	r9, #0x0\n\t"
+        /* ca = 0 */
+        "MOV	r3, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_word_%=:\n\t"
+        /* mu = a[i] * mp */
+        "LDR	r10, [%[a]]\n\t"
+        "MUL	r8, %[mp], r10\n\t"
+        /* j = 0 */
+        "MOV	r12, #0x0\n\t"
+        "MOV	r4, #0x0\n\t"
+        "\n"
+    "L_sp_4096_mont_reduce_128_mul_%=:\n\t"
+        /* a[i+j+0] += m[j+0] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+1] += m[j+1] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+2] += m[j+2] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        /* a[i+j+3] += m[j+3] * mu */
+        "LDR	r7, [%[m], r12]\n\t"
+        "LDR	r10, [%[a], r12]\n\t"
+        "UMAAL	r10, r4, r8, r7\n\t"
+        "STR	r10, [%[a], r12]\n\t"
+        /* j += 1 */
+        "ADD	r12, r12, #0x4\n\t"
+        "CMP	r12, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_mul_%=\n\t"
+#endif
+        "LDR	r10, [%[a], #512]\n\t"
+        "ADDS	r4, r4, r3\n\t"
+        "MOV	r3, #0x0\n\t"
+        "ADC	r3, r3, #0x0\n\t"
+        "ADDS	r10, r10, r4\n\t"
+        "ADC	r3, r3, r3\n\t"
+        "STR	r10, [%[a], #512]\n\t"
+        /* i += 1 */
+        "ADD	r9, r9, #0x4\n\t"
+        "ADD	%[a], %[a], #0x4\n\t"
+        "CMP	r9, #0x200\n\t"
+#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
+        "BLT	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#else
+        "BLT.N	L_sp_4096_mont_reduce_128_word_%=\n\t"
+#endif
+        /* Loop Done */
+        "MOV	%[mp], r3\n\t"
+        : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp)
+        :
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+    );
+    sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
 #endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -24963,11 +26600,17 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -25004,11 +26647,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -25254,11 +26903,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -25313,11 +26968,17 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -25480,16 +27141,22 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x1fc\n\t"
         "\n"
@@ -27455,12 +29122,18 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em,
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -27468,7 +29141,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_4096_cond_add_64_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -27499,12 +29172,18 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -28055,11 +29734,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
 #ifdef WOLFSSL_HAVE_SP_DH
 
 #ifdef HAVE_FFDHE_4096
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -29126,11 +30811,17 @@ static const sp_digit p256_b[8] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -29198,17 +30889,26 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
 
 #else
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x24\n\t"
@@ -29551,115 +31251,129 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x2c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #36]\n\t"
+#else
+        "STR	%[r], [sp, #36]\n\t"
+        "STR	%[a], [sp, #40]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #40]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #40]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #32]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #32]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -29684,10 +31398,16 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -29780,15 +31500,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 
 #else
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -30023,62 +31752,71 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif /* __IAR_SYSTEMS_ICC__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x20\n\t"
         "STR	%[r], [sp, #28]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -30096,10 +31834,10 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -30118,14 +31856,14 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
         /* R[15] = r7 */
         "LDR	lr, [sp, #28]\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
         "STM	lr!, {r7}\n\t"
         "SUB	lr, lr, #0x40\n\t"
-        "LDM	sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
-        "STM	lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t"
+        "LDM	sp, {r0, r1, r2, r3, r4, r5, r6}\n\t"
+        "STM	lr, {r0, r1, r2, r3, r4, r5, r6}\n\t"
         "ADD	sp, sp, #0x20\n\t"
         : [r] "+r" (r), [a] "+r" (a)
         :
@@ -30142,18 +31880,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x20\n\t"
         "\n"
     "L_sp_256_add_8_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -30184,11 +31928,17 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -30221,10 +31971,16 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  The number to convert.
  * m  The modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x18\n\t"
@@ -30232,7 +31988,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         /* Clear overflow and underflow */
         "MOV	r11, #0x0\n\t"
         "MOV	r12, #0x0\n\t"
-        "# t[0] =  1  1  0 -1 -1 -1 -1  0\n\t"
+        /* t[0] =  1  1  0 -1 -1 -1 -1  0 */
         "ADDS	r10, r2, r3\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "SUBS	r10, r10, r5\n\t"
@@ -30247,7 +32003,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[1] =  0  1  1  0 -1 -1 -1 -1\n\t"
+        /* t[1] =  0  1  1  0 -1 -1 -1 -1 */
         "ADDS	r11, r11, r3\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r4\n\t"
@@ -30266,7 +32022,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #4]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[2] =  0  0  1  1  0 -1 -1 -1\n\t"
+        /* t[2] =  0  0  1  1  0 -1 -1 -1 */
         "ADDS	r10, r10, r4\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r5\n\t"
@@ -30283,7 +32039,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp, #8]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[3] = -1 -1  0  2  2  1  0 -1\n\t"
+        /* t[3] = -1 -1  0  2  2  1  0 -1 */
         "ADDS	r11, r11, r5\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r5\n\t"
@@ -30306,7 +32062,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #12]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[4] =  0 -1 -1  0  2  2  1  0\n\t"
+        /* t[4] =  0 -1 -1  0  2  2  1  0 */
         "ADDS	r10, r10, r6\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r6\n\t"
@@ -30327,7 +32083,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r10, [sp, #16]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[5] =  0  0 -1 -1  0  2  2  1\n\t"
+        /* t[5] =  0  0 -1 -1  0  2  2  1 */
         "ADDS	r11, r11, r7\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r7\n\t"
@@ -30348,7 +32104,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "STR	r11, [sp, #20]\n\t"
         "neg	r12, r12\n\t"
         "MOV	r11, #0x0\n\t"
-        "# t[6] = -1 -1  0  0  0  1  3  2\n\t"
+        /* t[6] = -1 -1  0  0  0  1  3  2 */
         "ADDS	r10, r10, r7\n\t"
         "ADC	r11, r11, #0x0\n\t"
         "ADDS	r10, r10, r8\n\t"
@@ -30371,7 +32127,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         "MOV	r8, r10\n\t"
         "neg	r12, r12\n\t"
         "MOV	r10, #0x0\n\t"
-        "# t[7] =  1  0 -1 -1 -1 -1  0  3\n\t"
+        /* t[7] =  1  0 -1 -1 -1 -1  0  3 */
         "ADDS	r11, r11, r2\n\t"
         "ADC	r10, r10, #0x0\n\t"
         "ADDS	r11, r11, r9\n\t"
@@ -30447,7 +32203,11 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
     return (uint32_t)(size_t)r;
 }
 
@@ -30647,6 +32407,9 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -30656,11 +32419,17 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -31119,11 +32888,22 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Multiply two Montgomery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -31133,109 +32913,120 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x4c\n\t"
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
         "STRD	%[r], %[a], [sp, #68]\n\t"
+#else
+        "STR	%[r], [sp, #68]\n\t"
+        "STR	%[a], [sp, #72]\n\t"
+#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */
         "MOV	lr, %[b]\n\t"
-        "LDM	%[a], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	%[a], {r0, r1, r2, r3}\n\t"
         "LDM	lr!, {r4, r5, r6}\n\t"
-        "UMULL	r10, r11, %[r], r4\n\t"
-        "UMULL	r12, r7, %[a], r4\n\t"
-        "UMAAL	r11, r12, %[r], r5\n\t"
-        "UMULL	r8, r9, %[b], r4\n\t"
-        "UMAAL	r12, r8, %[a], r5\n\t"
-        "UMAAL	r12, r7, %[r], r6\n\t"
+        "UMULL	r10, r11, r0, r4\n\t"
+        "UMULL	r12, r7, r1, r4\n\t"
+        "UMAAL	r11, r12, r0, r5\n\t"
+        "UMULL	r8, r9, r2, r4\n\t"
+        "UMAAL	r12, r8, r1, r5\n\t"
+        "UMAAL	r12, r7, r0, r6\n\t"
         "UMAAL	r8, r9, r3, r4\n\t"
         "STM	sp, {r10, r11, r12}\n\t"
-        "UMAAL	r7, r8, %[b], r5\n\t"
+        "UMAAL	r7, r8, r2, r5\n\t"
         "LDM	lr!, {r4}\n\t"
-        "UMULL	r10, r11, %[a], r6\n\t"
-        "UMAAL	r8, r9, %[b], r6\n\t"
-        "UMAAL	r7, r10, %[r], r4\n\t"
+        "UMULL	r10, r11, r1, r6\n\t"
+        "UMAAL	r8, r9, r2, r6\n\t"
+        "UMAAL	r7, r10, r0, r4\n\t"
         "UMAAL	r8, r11, r3, r5\n\t"
         "STR	r7, [sp, #12]\n\t"
-        "UMAAL	r8, r10, %[a], r4\n\t"
+        "UMAAL	r8, r10, r1, r4\n\t"
         "UMAAL	r9, r11, r3, r6\n\t"
-        "UMAAL	r9, r10, %[b], r4\n\t"
+        "UMAAL	r9, r10, r2, r4\n\t"
         "UMAAL	r10, r11, r3, r4\n\t"
         "LDM	lr, {r4, r5, r6, r7}\n\t"
         "MOV	r12, #0x0\n\t"
-        "UMLAL	r8, r12, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
-        "UMAAL	r10, r12, %[b], r4\n\t"
+        "UMLAL	r8, r12, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
+        "UMAAL	r10, r12, r2, r4\n\t"
         "UMAAL	r11, r12, r3, r4\n\t"
         "MOV	r4, #0x0\n\t"
-        "UMLAL	r9, r4, %[r], r5\n\t"
-        "UMAAL	r10, r4, %[a], r5\n\t"
-        "UMAAL	r11, r4, %[b], r5\n\t"
+        "UMLAL	r9, r4, r0, r5\n\t"
+        "UMAAL	r10, r4, r1, r5\n\t"
+        "UMAAL	r11, r4, r2, r5\n\t"
         "UMAAL	r12, r4, r3, r5\n\t"
         "MOV	r5, #0x0\n\t"
-        "UMLAL	r10, r5, %[r], r6\n\t"
-        "UMAAL	r11, r5, %[a], r6\n\t"
-        "UMAAL	r12, r5, %[b], r6\n\t"
+        "UMLAL	r10, r5, r0, r6\n\t"
+        "UMAAL	r11, r5, r1, r6\n\t"
+        "UMAAL	r12, r5, r2, r6\n\t"
         "UMAAL	r4, r5, r3, r6\n\t"
         "MOV	r6, #0x0\n\t"
-        "UMLAL	r11, r6, %[r], r7\n\t"
-        "LDR	%[r], [sp, #72]\n\t"
-        "UMAAL	r12, r6, %[a], r7\n\t"
-        "ADD	%[r], %[r], #0x10\n\t"
-        "UMAAL	r4, r6, %[b], r7\n\t"
+        "UMLAL	r11, r6, r0, r7\n\t"
+        "LDR	r0, [sp, #72]\n\t"
+        "UMAAL	r12, r6, r1, r7\n\t"
+        "ADD	r0, r0, #0x10\n\t"
+        "UMAAL	r4, r6, r2, r7\n\t"
         "SUB	lr, lr, #0x10\n\t"
         "UMAAL	r5, r6, r3, r7\n\t"
-        "LDM	%[r], {%[r], %[a], %[b], r3}\n\t"
+        "LDM	r0, {r0, r1, r2, r3}\n\t"
         "STR	r6, [sp, #64]\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r7, #0x0\n\t"
-        "UMLAL	r8, r7, %[r], r6\n\t"
-        "UMAAL	r9, r7, %[a], r6\n\t"
+        "UMLAL	r8, r7, r0, r6\n\t"
+        "UMAAL	r9, r7, r1, r6\n\t"
         "STR	r8, [sp, #16]\n\t"
-        "UMAAL	r10, r7, %[b], r6\n\t"
+        "UMAAL	r10, r7, r2, r6\n\t"
         "UMAAL	r11, r7, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r8, #0x0\n\t"
-        "UMLAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r10, r8, %[a], r6\n\t"
+        "UMLAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r10, r8, r1, r6\n\t"
         "STR	r9, [sp, #20]\n\t"
-        "UMAAL	r11, r8, %[b], r6\n\t"
+        "UMAAL	r11, r8, r2, r6\n\t"
         "UMAAL	r12, r8, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r9, #0x0\n\t"
-        "UMLAL	r10, r9, %[r], r6\n\t"
-        "UMAAL	r11, r9, %[a], r6\n\t"
+        "UMLAL	r10, r9, r0, r6\n\t"
+        "UMAAL	r11, r9, r1, r6\n\t"
         "STR	r10, [sp, #24]\n\t"
-        "UMAAL	r12, r9, %[b], r6\n\t"
+        "UMAAL	r12, r9, r2, r6\n\t"
         "UMAAL	r4, r9, r3, r6\n\t"
         "LDM	lr!, {r6}\n\t"
         "MOV	r10, #0x0\n\t"
-        "UMLAL	r11, r10, %[r], r6\n\t"
-        "UMAAL	r12, r10, %[a], r6\n\t"
+        "UMLAL	r11, r10, r0, r6\n\t"
+        "UMAAL	r12, r10, r1, r6\n\t"
         "STR	r11, [sp, #28]\n\t"
-        "UMAAL	r4, r10, %[b], r6\n\t"
+        "UMAAL	r4, r10, r2, r6\n\t"
         "UMAAL	r5, r10, r3, r6\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r12, r7, %[r], r11\n\t"
-        "UMAAL	r4, r7, %[a], r11\n\t"
+        "UMAAL	r12, r7, r0, r11\n\t"
+        "UMAAL	r4, r7, r1, r11\n\t"
         "LDR	r6, [sp, #64]\n\t"
-        "UMAAL	r5, r7, %[b], r11\n\t"
+        "UMAAL	r5, r7, r2, r11\n\t"
         "UMAAL	r6, r7, r3, r11\n\t"
         "LDM	lr!, {r11}\n\t"
-        "UMAAL	r4, r8, %[r], r11\n\t"
-        "UMAAL	r5, r8, %[a], r11\n\t"
-        "UMAAL	r6, r8, %[b], r11\n\t"
+        "UMAAL	r4, r8, r0, r11\n\t"
+        "UMAAL	r5, r8, r1, r11\n\t"
+        "UMAAL	r6, r8, r2, r11\n\t"
         "UMAAL	r7, r8, r3, r11\n\t"
         "LDM	lr, {r11, lr}\n\t"
-        "UMAAL	r5, r9, %[r], r11\n\t"
-        "UMAAL	r6, r10, %[r], lr\n\t"
-        "UMAAL	r6, r9, %[a], r11\n\t"
-        "UMAAL	r7, r10, %[a], lr\n\t"
-        "UMAAL	r7, r9, %[b], r11\n\t"
-        "UMAAL	r8, r10, %[b], lr\n\t"
+        "UMAAL	r5, r9, r0, r11\n\t"
+        "UMAAL	r6, r10, r0, lr\n\t"
+        "UMAAL	r6, r9, r1, r11\n\t"
+        "UMAAL	r7, r10, r1, lr\n\t"
+        "UMAAL	r7, r9, r2, r11\n\t"
+        "UMAAL	r8, r10, r2, lr\n\t"
         "UMAAL	r8, r9, r3, r11\n\t"
         "UMAAL	r9, r10, r3, lr\n\t"
         "MOV	r3, r12\n\t"
@@ -31369,12 +33160,23 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #endif
 #ifdef WOLFSSL_SP_NO_UMAAL
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -31382,10 +33184,16 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -31736,11 +33544,22 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #else
+#ifdef __IAR_SYSTEMS_ICC__
+#pragma inline=never
+#endif
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -31748,57 +33567,63 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * m   Modulus (prime).
  * mp  Montgomery multiplier.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
         "STR	%[r], [sp, #64]\n\t"
-        "LDM	%[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t"
-        "UMULL	r9, r10, %[r], %[r]\n\t"
-        "UMULL	r11, r12, %[r], %[a]\n\t"
+        "LDM	%[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
+        "UMULL	r9, r10, r0, r0\n\t"
+        "UMULL	r11, r12, r0, r1\n\t"
         "ADDS	r11, r11, r11\n\t"
         "MOV	lr, #0x0\n\t"
         "UMAAL	r10, r11, lr, lr\n\t"
         "STM	sp, {r9, r10}\n\t"
         "MOV	r8, lr\n\t"
-        "UMAAL	r8, r12, %[r], r2\n\t"
+        "UMAAL	r8, r12, r0, r2\n\t"
         "ADCS	r8, r8, r8\n\t"
-        "UMAAL	r8, r11, %[a], %[a]\n\t"
-        "UMULL	r9, r10, %[r], r3\n\t"
-        "UMAAL	r9, r12, %[a], r2\n\t"
+        "UMAAL	r8, r11, r1, r1\n\t"
+        "UMULL	r9, r10, r0, r3\n\t"
+        "UMAAL	r9, r12, r1, r2\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STRD	r8, r9, [sp, #8]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r10, %[r], r4\n\t"
-        "UMAAL	r9, r12, %[a], r3\n\t"
+        "UMAAL	r9, r10, r0, r4\n\t"
+        "UMAAL	r9, r12, r1, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r2, r2\n\t"
         "STR	r9, [sp, #16]\n\t"
-        "UMULL	r9, r8, %[r], r5\n\t"
-        "UMAAL	r9, r12, %[a], r4\n\t"
+        "UMULL	r9, r8, r0, r5\n\t"
+        "UMAAL	r9, r12, r1, r4\n\t"
         "UMAAL	r9, r10, r2, r3\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, lr, lr\n\t"
         "STR	r9, [sp, #20]\n\t"
         "MOV	r9, lr\n\t"
-        "UMAAL	r9, r8, %[r], r6\n\t"
-        "UMAAL	r9, r12, %[a], r5\n\t"
+        "UMAAL	r9, r8, r0, r6\n\t"
+        "UMAAL	r9, r12, r1, r5\n\t"
         "UMAAL	r9, r10, r2, r4\n\t"
         "ADCS	r9, r9, r9\n\t"
         "UMAAL	r9, r11, r3, r3\n\t"
         "STR	r9, [sp, #24]\n\t"
-        "UMULL	%[r], r9, %[r], r7\n\t"
-        "UMAAL	%[r], r8, %[a], r6\n\t"
-        "UMAAL	%[r], r12, r2, r5\n\t"
-        "UMAAL	%[r], r10, r3, r4\n\t"
-        "ADCS	%[r], %[r], %[r]\n\t"
-        "UMAAL	%[r], r11, lr, lr\n\t"
+        "UMULL	r0, r9, r0, r7\n\t"
+        "UMAAL	r0, r8, r1, r6\n\t"
+        "UMAAL	r0, r12, r2, r5\n\t"
+        "UMAAL	r0, r10, r3, r4\n\t"
+        "ADCS	r0, r0, r0\n\t"
+        "UMAAL	r0, r11, lr, lr\n\t"
         /* R[7] = r0 */
-        "UMAAL	r9, r8, %[a], r7\n\t"
+        "UMAAL	r9, r8, r1, r7\n\t"
         "UMAAL	r9, r10, r2, r6\n\t"
         "UMAAL	r12, r9, r3, r5\n\t"
         "ADCS	r12, r12, r12\n\t"
@@ -31816,10 +33641,10 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         "ADCS	r3, r2, r2\n\t"
         "UMAAL	r10, r3, r5, r5\n\t"
         /* R[10] = r10 */
-        "MOV	%[a], lr\n\t"
-        "UMAAL	%[a], r8, r4, r7\n\t"
-        "UMAAL	%[a], r9, r5, r6\n\t"
-        "ADCS	r4, %[a], %[a]\n\t"
+        "MOV	r1, lr\n\t"
+        "UMAAL	r1, r8, r4, r7\n\t"
+        "UMAAL	r1, r9, r5, r6\n\t"
+        "ADCS	r4, r1, r1\n\t"
         "UMAAL	r3, r4, lr, lr\n\t"
         /* R[11] = r3 */
         "UMAAL	r8, r9, r5, r7\n\t"
@@ -31838,7 +33663,7 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         /* R[15] = r7 */
         "MOV	lr, sp\n\t"
         "ADD	lr, lr, #0x1c\n\t"
-        "STM	lr!, {%[r], r12}\n\t"
+        "STM	lr!, {r0, r12}\n\t"
         "STM	lr!, {r11}\n\t"
         "STM	lr!, {r10}\n\t"
         "STM	lr!, {r3, r4, r8, r9}\n\t"
@@ -31971,8 +33796,16 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #endif
@@ -32079,16 +33912,22 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x1c\n\t"
         "\n"
@@ -32221,12 +34060,18 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -32265,12 +34110,18 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -32321,11 +34172,17 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -32428,11 +34285,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -32518,9 +34381,15 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -32658,8 +34527,16 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
         :
         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
@@ -32669,11 +34546,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -32776,11 +34659,17 @@ static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_di
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -32907,11 +34796,17 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	lr, #0x0\n\t"
@@ -32953,7 +34848,11 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Double a Montgomery form number (r = a + a % m).
@@ -32962,10 +34861,16 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -33003,7 +34908,11 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m).
@@ -33012,10 +34921,16 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -33085,7 +35000,11 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m).
@@ -33095,11 +35014,17 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	lr, #0x0\n\t"
@@ -33139,7 +35064,11 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
         :
         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
@@ -33148,11 +35077,17 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  Number to divide.
  * m  Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r4, r5, r6, r7}\n\t"
@@ -36706,9 +38641,15 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_add_one_8(sp_digit* a_p)
+#else
+static void sp_256_add_one_8(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -37104,10 +39045,16 @@ int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -37143,10 +39090,16 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -37179,11 +39132,17 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -37226,11 +39185,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -37290,11 +39255,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -37349,11 +39320,17 @@ static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -38058,11 +40035,17 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -38099,11 +40082,17 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -38129,10 +40118,16 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit*
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -38173,11 +40168,17 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p)
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -38237,9 +40238,15 @@ static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_256_num_bits_8(const sp_digit* a_p)
+#else
+static int sp_256_num_bits_8(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #28]\n\t"
@@ -39439,11 +41446,17 @@ static const sp_digit p384_b[12] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x60\n\t"
@@ -39516,11 +41529,17 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -40538,10 +42557,16 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x60\n\t"
@@ -40638,10 +42663,16 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x30\n\t"
@@ -41335,18 +43366,24 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x30\n\t"
         "\n"
     "L_sp_384_add_12_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -41377,11 +43414,17 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -41721,12 +43764,18 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -41765,12 +43814,18 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -41834,11 +43889,17 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -41973,11 +44034,17 @@ static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit m
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -42225,16 +44292,22 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x2c\n\t"
         "\n"
@@ -42449,12 +44522,18 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42468,11 +44547,17 @@ static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42486,11 +44571,17 @@ static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42507,11 +44598,17 @@ static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -42548,11 +44645,17 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -42594,12 +44697,18 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -42607,7 +44716,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_384_cond_add_12_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -42638,12 +44747,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -42705,12 +44820,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     sp_digit o;
 
@@ -42721,10 +44842,16 @@ static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
 #ifdef WOLFSSL_SP_SMALL
 #else
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
@@ -46360,9 +48487,15 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_add_one_12(sp_digit* a_p)
+#else
+static void sp_384_add_one_12(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -46764,10 +48897,16 @@ int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -46803,10 +48942,16 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -46846,11 +48991,17 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -46893,11 +49044,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -46977,11 +49134,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -47036,11 +49199,17 @@ static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -47715,11 +49884,17 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4}\n\t"
@@ -47817,9 +49992,15 @@ static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_384_num_bits_12(const sp_digit* a_p)
+#else
+static int sp_384_num_bits_12(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #44]\n\t"
@@ -49117,11 +51298,17 @@ static const sp_digit p521_b[17] = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -49197,11 +51384,17 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -51233,10 +53426,16 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_sqr_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x88\n\t"
@@ -51336,10 +53535,16 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_sqr_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -52617,18 +54822,24 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x40\n\t"
         "\n"
     "L_sp_521_add_17_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -52644,7 +54855,7 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
 #else
         "BNE.N	L_sp_521_add_17_word_%=\n\t"
 #endif
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a], {r4}\n\t"
         "LDM	%[b], {r8}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -52665,11 +54876,17 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -52936,12 +55153,18 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -52980,12 +55203,18 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -53065,9 +55294,15 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x44\n\t"
@@ -53177,8 +55412,16 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m
         :
         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)mp_p;
+#else
+    (void)mp;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 #ifdef WOLFSSL_SP_NO_UMAAL
@@ -53188,11 +55431,17 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -53448,11 +55697,17 @@ static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_d
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -53803,16 +56058,22 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x40\n\t"
         "\n"
@@ -54082,11 +56343,17 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
@@ -54153,7 +56420,11 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Double a Montgomery form number (r = a + a % m).
@@ -54162,10 +56433,16 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -54223,7 +56500,11 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m).
@@ -54232,10 +56513,16 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r2, #0x0\n\t"
@@ -54313,7 +56600,11 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
 /* Subtract two Montgomery form numbers (r = a - b % m).
@@ -54323,11 +56614,17 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
@@ -54395,13 +56692,23 @@ static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
         :
         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12"
     );
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
     (void)m_p;
+#else
+    (void)m;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
@@ -58679,9 +60986,15 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am,
  *
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_add_one_17(sp_digit* a_p)
+#else
+static void sp_521_add_one_17(sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r1, r2, r3, r4}\n\t"
@@ -59085,11 +61398,17 @@ int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv,
 #endif /* HAVE_ECC_DHE */
 
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x20\n\t"
@@ -59184,11 +61503,17 @@ static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
 #endif
 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -59300,11 +61625,17 @@ static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p)
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p)
+#else
+static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register byte n asm ("r2") = (byte)n_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register byte n __asm__ ("r2") = (byte)n_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "RSB	r7, %[n], #0x1f\n\t"
@@ -59524,10 +61855,16 @@ static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -59568,10 +61905,16 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -59622,11 +61965,17 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -59669,11 +62018,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -59778,11 +62133,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -59837,11 +62198,17 @@ static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -60547,11 +62914,17 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r11, #0x0\n\t"
@@ -60593,11 +62966,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -60647,11 +63026,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit
  * a  Number to divide.
  * m  Modulus.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4}\n\t"
@@ -60784,9 +63169,15 @@ static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi
     );
 }
 
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static int sp_521_num_bits_17(const sp_digit* a_p)
+#else
+static int sp_521_num_bits_17(const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	r1, [%[a], #64]\n\t"
@@ -62032,11 +64423,17 @@ typedef struct sp_point_1024 {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -63834,10 +66231,16 @@ static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x40\n\t"
@@ -64981,11 +67384,17 @@ static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65030,10 +67439,16 @@ static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3, r4, r5}\n\t"
@@ -65106,11 +67521,17 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65253,11 +67674,17 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a,
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r3, r4, r5, r6}\n\t"
@@ -65339,11 +67766,17 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -65414,10 +67847,16 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "SUB	sp, sp, #0x100\n\t"
@@ -65600,10 +68039,16 @@ static const sp_point_1024 p1024_base = {
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -65643,12 +68088,18 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p)
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r8, #0x0\n\t"
@@ -65687,12 +68138,18 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -65824,18 +68281,24 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r3, #0x0\n\t"
         "ADD	r12, %[a], #0x80\n\t"
         "\n"
     "L_sp_1024_add_32_word_%=:\n\t"
-        "ADDS	r3, r3, #0x-1\n\t"
+        "ADDS	r3, r3, #0xffffffff\n\t"
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
         "LDM	%[b]!, {r8, r9, r10, r11}\n\t"
         "ADCS	r4, r4, r8\n\t"
@@ -65867,11 +68330,17 @@ static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -65914,11 +68383,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
+#else
+static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register sp_digit b asm ("r2") = (sp_digit)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register sp_digit b __asm__ ("r2") = (sp_digit)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* A[0] * B */
@@ -66098,11 +68573,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r8, %[div], #16\n\t"
@@ -66157,11 +68638,17 @@ static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
  *
  * Note that this is an approximate div. It may give an answer 1 larger.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p)
+#else
+static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit d1 asm ("r0") = (sp_digit)d1_p;
-    register sp_digit d0 asm ("r1") = (sp_digit)d0_p;
-    register sp_digit div asm ("r2") = (sp_digit)div_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p;
+    register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p;
+    register sp_digit div __asm__ ("r2") = (sp_digit)div_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LSR	r5, %[div], #1\n\t"
@@ -66251,16 +68738,22 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p)
+#else
+static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register const sp_digit* a asm ("r0") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r1") = (const sp_digit*)b_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
-        "MOV	r2, #0x-1\n\t"
+        "MOV	r2, #0xffffffff\n\t"
         "MOV	r8, #0x1\n\t"
         "MOV	r7, #0x0\n\t"
-        "MOV	r3, #0x-1\n\t"
+        "MOV	r3, #0xffffffff\n\t"
 #ifdef WOLFSSL_SP_SMALL
         "MOV	r6, #0x7c\n\t"
         "\n"
@@ -66977,11 +69470,17 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm)
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDR	lr, [%[m]]\n\t"
@@ -67281,11 +69780,17 @@ static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p)
+#else
+static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* a asm ("r0") = (sp_digit*)a_p;
-    register const sp_digit* m asm ("r1") = (const sp_digit*)m_p;
-    register sp_digit mp asm ("r2") = (sp_digit)mp_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p;
+    register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         /* i = 0 */
@@ -67633,12 +70138,18 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p,
  * b   Second number to add in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -67805,11 +70316,17 @@ static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a   Number to double in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -67960,11 +70477,17 @@ static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * a   Number to triple in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r12, #0x0\n\t"
@@ -68271,12 +70794,18 @@ static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * b   Number to subtract with in Montgomery form.
  * m   Modulus (prime).
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p)
+#else
+static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a]!, {r4, r5, r6, r7}\n\t"
@@ -68440,12 +70969,18 @@ static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r5, #0x0\n\t"
@@ -68453,7 +70988,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
         "MOV	r4, #0x0\n\t"
         "\n"
     "L_sp_1024_cond_add_32_words_%=:\n\t"
-        "ADDS	r5, r5, #0x-1\n\t"
+        "ADDS	r5, r5, #0xffffffff\n\t"
         "LDR	r6, [%[a], r4]\n\t"
         "LDR	r7, [%[b], r4]\n\t"
         "AND	r7, r7, %[m]\n\t"
@@ -68484,12 +71019,18 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
  * b  A single precision number to add.
  * m  Mask value to apply.
  */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p)
+#else
+static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
-    register const sp_digit* b asm ("r2") = (const sp_digit*)b_p;
-    register sp_digit m asm ("r3") = (sp_digit)m_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+    register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p;
+    register sp_digit m __asm__ ("r3") = (sp_digit)m_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "MOV	r10, #0x0\n\t"
@@ -68614,10 +71155,16 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp
 }
 
 #endif /* WOLFSSL_SP_SMALL */
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
 static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p)
+#else
+static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a)
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 {
-    register sp_digit* r asm ("r0") = (sp_digit*)r_p;
-    register const sp_digit* a asm ("r1") = (const sp_digit*)a_p;
+#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+    register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p;
+    register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p;
+#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
 
     __asm__ __volatile__ (
         "LDM	%[a], {r2, r3}\n\t"
diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S
index 4c423bc82..6879391de 100644
--- a/wolfcrypt/src/sp_x86_64_asm.S
+++ b/wolfcrypt/src/sp_x86_64_asm.S
@@ -1,5 +1,5 @@
-/* sp_x86_64_asm
- *
+/* sp_x86_64_asm.S */
+/*
  * Copyright (C) 2006-2023 wolfSSL Inc.
  *
  * This file is part of wolfSSL.
diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm
index 9e7523d7b..fa6558cc9 100644
--- a/wolfcrypt/src/sp_x86_64_asm.asm
+++ b/wolfcrypt/src/sp_x86_64_asm.asm
@@ -1,5 +1,5 @@
-; /* sp_x86_64_asm
-;  *
+; /* sp_x86_64_asm.asm */
+; /*
 ;  * Copyright (C) 2006-2023 wolfSSL Inc.
 ;  *
 ;  * This file is part of wolfSSL.