From cefe108cab5e2c6784cd29af39188c4db632056e Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Fri, 3 Nov 2023 08:45:10 +1000 Subject: [PATCH] Thumb2 inline ASM: IAR doesn't do register variables IAR doesn't parse register variable declarations with specified registers. IAR doesn't even honor the register keyword. Can use small negative but IAR doesn't like it. Specify the positive value instead. Add a small code size version of mont_reduce_full using umlal and umaal. Make 'asm' usage in variables use keyword '__asm__'. Explicitly don't inline some functions when compiling with IAR. --- wolfcrypt/src/port/arm/thumb2-aes-asm_c.c | 193 +- wolfcrypt/src/port/arm/thumb2-curve25519_c.c | 397 +- wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c | 15 +- wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c | 15 +- wolfcrypt/src/sp_cortexm.c | 4409 ++++++++++++++---- wolfcrypt/src/sp_x86_64_asm.S | 4 +- wolfcrypt/src/sp_x86_64_asm.asm | 4 +- 7 files changed, 3956 insertions(+), 1081 deletions(-) diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 1564a6f9d..48b5edc16 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -44,6 +44,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -198,12 +199,18 @@ static const uint32_t* L_AES_Thumb2_te = L_AES_Thumb2_te_data; #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key(unsigned char* ks, word32 rounds); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_invert_key(unsigned char* ks_p, word32 rounds_p) +#else +void AES_invert_key(unsigned char* ks, word32 rounds) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; - register word32 rounds asm ("r1") = (word32)rounds_p; - register uint32_t* L_AES_Thumb2_te_c asm ("r2") = (uint32_t*)L_AES_Thumb2_te; - register uint32_t* L_AES_Thumb2_td_c asm ("r3") = (uint32_t*)L_AES_Thumb2_td; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p; + register word32 rounds __asm__ ("r1") = (word32)rounds_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_c __asm__ ("r2") = (uint32_t*)L_AES_Thumb2_te; + register uint32_t* L_AES_Thumb2_td_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_td; __asm__ __volatile__ ( "MOV r12, %[L_AES_Thumb2_te]\n\t" @@ -314,13 +321,19 @@ static const uint32_t L_AES_Thumb2_rcon[] = { void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) +#else +void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; - register word32 len asm ("r1") = (word32)len_p; - register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; - register uint32_t* L_AES_Thumb2_te_c asm ("r3") = (uint32_t*)L_AES_Thumb2_te; - register uint32_t* L_AES_Thumb2_rcon_c asm ("r4") = (uint32_t*)&L_AES_Thumb2_rcon; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* key __asm__ ("r0") = (const unsigned char*)key_p; + register word32 len __asm__ ("r1") = (word32)len_p; + register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_c __asm__ ("r3") = (uint32_t*)L_AES_Thumb2_te; + register uint32_t* L_AES_Thumb2_rcon_c __asm__ ("r4") = (uint32_t*)&L_AES_Thumb2_rcon; __asm__ __volatile__ ( "MOV r8, %[L_AES_Thumb2_te]\n\t" @@ -534,12 +547,18 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char } void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p) +#else +void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const uint32_t* te asm ("r0") = (const uint32_t*)te_p; - register int nr asm ("r1") = (int)nr_p; - register int len asm ("r2") = (int)len_p; - register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const uint32_t* te __asm__ ("r0") = (const uint32_t*)te_p; + register int nr __asm__ ("r1") = (int)nr_p; + register int len __asm__ ("r2") = (int)len_p; + register const uint32_t* ks __asm__ ("r3") = (const uint32_t*)ks_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" @@ -762,14 +781,20 @@ static const uint32_t* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data; #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +#else +void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb; __asm__ __volatile__ ( "MOV lr, %[in]\n\t" @@ -918,15 +943,21 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #ifdef HAVE_AES_CBC void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +#else +void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; - register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; + register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; __asm__ __volatile__ ( "MOV r8, r4\n\t" @@ -1090,15 +1121,21 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #ifdef WOLFSSL_AES_COUNTER void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +#else +void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; - register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; + register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; __asm__ __volatile__ ( "MOV r12, r4\n\t" @@ -1283,11 +1320,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p) +#else +void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; - register int nr asm ("r1") = (int)nr_p; - register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const uint32_t* td __asm__ ("r0") = (const uint32_t*)td_p; + register int nr __asm__ ("r1") = (int)nr_p; + register const uint8_t* td4 __asm__ ("r2") = (const uint8_t*)td4_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" @@ -1543,15 +1586,21 @@ static const unsigned char L_AES_Thumb2_td4[] = { #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +#else +void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb; - register unsigned char* L_AES_Thumb2_td4_c asm ("r6") = (unsigned char*)&L_AES_Thumb2_td4; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb; + register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r6") = (unsigned char*)&L_AES_Thumb2_td4; __asm__ __volatile__ ( "MOV r8, r4\n\t" @@ -1697,16 +1746,22 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #ifdef HAVE_AES_CBC void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +#else +void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; - register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb; - register unsigned char* L_AES_Thumb2_td4_c asm ("r7") = (unsigned char*)&L_AES_Thumb2_td4; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; + register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb; + register unsigned char* L_AES_Thumb2_td4_c __asm__ ("r7") = (unsigned char*)&L_AES_Thumb2_td4; __asm__ __volatile__ ( "MOV r8, r4\n\t" @@ -2033,13 +2088,19 @@ static const uint32_t L_GCM_gmult_len_r[] = { void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) +#else +void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register unsigned char* x asm ("r0") = (unsigned char*)x_p; - register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; - register const unsigned char* data asm ("r2") = (const unsigned char*)data_p; - register unsigned long len asm ("r3") = (unsigned long)len_p; - register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register unsigned char* x __asm__ ("r0") = (unsigned char*)x_p; + register const unsigned char** m __asm__ ("r1") = (const unsigned char**)m_p; + register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p; + register unsigned long len __asm__ ("r3") = (unsigned long)len_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_GCM_gmult_len_r_c __asm__ ("r4") = (uint32_t*)&L_GCM_gmult_len_r; __asm__ __volatile__ ( "MOV lr, %[L_GCM_gmult_len_r]\n\t" @@ -2603,15 +2664,21 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned static const uint32_t* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data; void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +#else +void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r4") = (int)nr_p; - register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; - register uint32_t* L_AES_Thumb2_te_gcm_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register int nr __asm__ ("r4") = (int)nr_p; + register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm; __asm__ __volatile__ ( "MOV r12, r4\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 2018b8e9d..edb2af068 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -44,6 +44,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -60,8 +61,14 @@ #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_init() +#else +void fe_init() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n\t" : @@ -71,8 +78,14 @@ void fe_init() } void fe_add_sub_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add_sub_op() +#else +void fe_add_sub_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Add-Sub */ "LDRD r4, r5, [r2]\n\t" @@ -168,8 +181,14 @@ void fe_add_sub_op() } void fe_sub_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sub_op() +#else +void fe_sub_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Sub */ "LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -202,11 +221,17 @@ void fe_sub_op() ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sub(fe r_p, const fe a_p, const fe b_p) +#else +void fe_sub(fe r, const fe a, const fe b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "BL fe_sub_op\n\t" @@ -217,8 +242,14 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) } void fe_add_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add_op() +#else +void fe_add_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Add */ "LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -251,11 +282,17 @@ void fe_add_op() ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_add(fe r_p, const fe a_p, const fe b_p) +#else +void fe_add(fe r, const fe a, const fe b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "BL fe_add_op\n\t" @@ -266,10 +303,16 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) } #ifdef HAVE_ED25519 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_frombytes(fe out_p, const unsigned char* in_p) +#else +void fe_frombytes(fe out, const unsigned char* in) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* out asm ("r0") = (sword32*)out_p; - register const unsigned char* in asm ("r1") = (const unsigned char*)in_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* out __asm__ ("r0") = (sword32*)out_p; + register const unsigned char* in __asm__ ("r1") = (const unsigned char*)in_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR r2, [%[in]]\n\t" @@ -295,10 +338,16 @@ void fe_frombytes(fe out_p, const unsigned char* in_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_tobytes(unsigned char* out_p, const fe n_p) +#else +void fe_tobytes(unsigned char* out, const fe n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register unsigned char* out asm ("r0") = (unsigned char*)out_p; - register const sword32* n asm ("r1") = (const sword32*)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register unsigned char* out __asm__ ("r0") = (unsigned char*)out_p; + register const sword32* n __asm__ ("r1") = (const sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -335,9 +384,15 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_1(fe n_p) +#else +void fe_1(fe n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* n asm ("r0") = (sword32*)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* n __asm__ ("r0") = (sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Set one */ @@ -356,9 +411,15 @@ void fe_1(fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_0(fe n_p) +#else +void fe_0(fe n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* n asm ("r0") = (sword32*)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* n __asm__ ("r0") = (sword32*)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Set zero */ @@ -377,10 +438,16 @@ void fe_0(fe n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_copy(fe r_p, const fe a_p) +#else +void fe_copy(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Copy */ @@ -398,10 +465,16 @@ void fe_copy(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_neg(fe r_p, const fe a_p) +#else +void fe_neg(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MVN r7, #0x0\n\t" @@ -425,9 +498,15 @@ void fe_neg(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int fe_isnonzero(const fe a_p) +#else +int fe_isnonzero(const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sword32* a asm ("r0") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sword32* a __asm__ ("r0") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -464,9 +543,15 @@ int fe_isnonzero(const fe a_p) return (uint32_t)(size_t)a; } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int fe_isnegative(const fe a_p) +#else +int fe_isnegative(const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sword32* a asm ("r0") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sword32* a __asm__ ("r0") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r2, r3, r4, r5}\n\t" @@ -492,11 +577,17 @@ int fe_isnegative(const fe a_p) #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) #ifndef WC_NO_CACHE_RESISTANT +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) +#else +void fe_cmov_table(fe* r, fe* base, signed char b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register fe* r asm ("r0") = (fe*)r_p; - register fe* base asm ("r1") = (fe*)base_p; - register signed char b asm ("r2") = (signed char)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register fe* r __asm__ ("r0") = (fe*)r_p; + register fe* base __asm__ ("r1") = (fe*)base_p; + register signed char b __asm__ ("r2") = (signed char)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SXTB %[b], %[b]\n\t" @@ -1469,11 +1560,17 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) +#else +void fe_cmov_table(fe* r, fe* base, signed char b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register fe* r asm ("r0") = (fe*)r_p; - register fe* base asm ("r1") = (fe*)base_p; - register signed char b asm ("r2") = (signed char)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register fe* r __asm__ ("r0") = (fe*)r_p; + register fe* base __asm__ ("r1") = (fe*)base_p; + register signed char b __asm__ ("r2") = (signed char)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SXTB %[b], %[b]\n\t" @@ -1579,8 +1676,14 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif /* HAVE_ED25519 */ #ifdef WOLFSSL_SP_NO_UMAAL void fe_mul_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul_op() +#else +void fe_mul_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x28\n\t" "STR r0, [sp, #36]\n\t" @@ -1959,8 +2062,14 @@ void fe_mul_op() #else void fe_mul_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul_op() +#else +void fe_mul_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" "STRD r0, r1, [sp, #36]\n\t" @@ -2092,11 +2201,17 @@ void fe_mul_op() } #endif /* WOLFSSL_SP_NO_UMAAL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul(fe r_p, const fe a_p, const fe b_p) +#else +void fe_mul(fe r, const fe a, const fe b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "BL fe_mul_op\n\t" @@ -2108,8 +2223,14 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) #ifdef WOLFSSL_SP_NO_UMAAL void fe_sq_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq_op() +#else +void fe_sq_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" "STR r0, [sp, #64]\n\t" @@ -2381,8 +2502,14 @@ void fe_sq_op() #else void fe_sq_op(void); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq_op() +#else +void fe_sq_op() +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x20\n\t" "STR r0, [sp, #28]\n\t" @@ -2500,10 +2627,16 @@ void fe_sq_op() } #endif /* WOLFSSL_SP_NO_UMAAL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq(fe r_p, const fe a_p) +#else +void fe_sq(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "BL fe_sq_op\n\t" @@ -2515,10 +2648,16 @@ void fe_sq(fe r_p, const fe a_p) #ifdef HAVE_CURVE25519 #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul121666(fe r_p, fe a_p) +#else +void fe_mul121666(fe r, fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register sword32* a asm ("r1") = (sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register sword32* a __asm__ ("r1") = (sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Multiply by 121666 */ @@ -2568,10 +2707,16 @@ void fe_mul121666(fe r_p, fe a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_mul121666(fe r_p, fe a_p) +#else +void fe_mul121666(fe r, fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register sword32* a asm ("r1") = (sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register sword32* a __asm__ ("r1") = (sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Multiply by 121666 */ @@ -2609,11 +2754,17 @@ void fe_mul121666(fe r_p, fe a_p) #endif /* WOLFSSL_SP_NO_UMAAL */ #ifndef WC_NO_CACHE_RESISTANT +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int curve25519(byte* r_p, const byte* n_p, const byte* a_p) +#else +int curve25519(byte* r, const byte* n, const byte* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* r asm ("r0") = (byte*)r_p; - register const byte* n asm ("r1") = (const byte*)n_p; - register const byte* a asm ("r2") = (const byte*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* r __asm__ ("r0") = (byte*)r_p; + register const byte* n __asm__ ("r1") = (const byte*)n_p; + register const byte* a __asm__ ("r2") = (const byte*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xbc\n\t" @@ -3039,11 +3190,17 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG int curve25519(byte* r_p, const byte* n_p, const byte* a_p) +#else +int curve25519(byte* r, const byte* n, const byte* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* r asm ("r0") = (byte*)r_p; - register const byte* n asm ("r1") = (const byte*)n_p; - register const byte* a asm ("r2") = (const byte*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* r __asm__ ("r0") = (byte*)r_p; + register const byte* n __asm__ ("r1") = (const byte*)n_p; + register const byte* a __asm__ ("r2") = (const byte*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" @@ -3387,10 +3544,16 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_CURVE25519 */ #ifdef HAVE_ED25519 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_invert(fe r_p, const fe a_p) +#else +void fe_invert(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" @@ -3590,10 +3753,16 @@ void fe_invert(fe r_p, const fe a_p) } #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq2(fe r_p, const fe a_p) +#else +void fe_sq2(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -3898,10 +4067,16 @@ void fe_sq2(fe r_p, const fe a_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_sq2(fe r_p, const fe a_p) +#else +void fe_sq2(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x24\n\t" @@ -4055,10 +4230,16 @@ void fe_sq2(fe r_p, const fe a_p) } #endif /* WOLFSSL_SP_NO_UMAAL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void fe_pow22523(fe r_p, const fe a_p) +#else +void fe_pow22523(fe r, const fe a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x68\n\t" @@ -4257,10 +4438,16 @@ void fe_pow22523(fe r_p, const fe a_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) +#else +void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p2 * r asm ("r0") = (ge_p2 *)r_p; - register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p2 * r __asm__ ("r0") = (ge_p2 *)r_p; + register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" @@ -4287,10 +4474,16 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) +#else +void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p3 * r asm ("r0") = (ge_p3 *)r_p; - register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p3 * r __asm__ ("r0") = (ge_p3 *)r_p; + register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" @@ -4322,10 +4515,16 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) +#else +void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p2 * p __asm__ ("r1") = (const ge_p2 *)p_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" @@ -4369,11 +4568,17 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) +#else +void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xc\n\t" @@ -4451,11 +4656,17 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) +#else +void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0xc\n\t" @@ -4534,11 +4745,17 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) +#else +void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" @@ -4617,11 +4834,17 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) +#else +void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" @@ -4701,9 +4924,15 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) } #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_reduce(byte* s_p) +#else +void sc_reduce(byte* s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* s asm ("r0") = (byte*)s_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* s __asm__ ("r0") = (byte*)s_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x38\n\t" @@ -5129,9 +5358,15 @@ void sc_reduce(byte* s_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_reduce(byte* s_p) +#else +void sc_reduce(byte* s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* s asm ("r0") = (byte*)s_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* s __asm__ ("r0") = (byte*)s_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x38\n\t" @@ -5430,12 +5665,18 @@ void sc_reduce(byte* s_p) #endif /* WOLFSSL_SP_NO_UMAAL */ #ifdef HAVE_ED25519_SIGN #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) +#else +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* s asm ("r0") = (byte*)s_p; - register const byte* a asm ("r1") = (const byte*)a_p; - register const byte* b asm ("r2") = (const byte*)b_p; - register const byte* c asm ("r3") = (const byte*)c_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* s __asm__ ("r0") = (byte*)s_p; + register const byte* a __asm__ ("r1") = (const byte*)a_p; + register const byte* b __asm__ ("r2") = (const byte*)b_p; + register const byte* c __asm__ ("r3") = (const byte*)c_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x50\n\t" @@ -6218,12 +6459,18 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) } #else +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) +#else +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register byte* s asm ("r0") = (byte*)s_p; - register const byte* a asm ("r1") = (const byte*)a_p; - register const byte* b asm ("r2") = (const byte*)b_p; - register const byte* c asm ("r3") = (const byte*)c_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register byte* s __asm__ ("r0") = (byte*)s_p; + register const byte* a __asm__ ("r1") = (const byte*)a_p; + register const byte* b __asm__ ("r2") = (const byte*)b_p; + register const byte* c __asm__ ("r3") = (const byte*)c_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x50\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index 43659fb07..2483f036d 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -44,6 +44,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -73,12 +74,18 @@ static const uint32_t L_SHA256_transform_len_k[] = { }; void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint32_t* L_SHA256_transform_len_k_c __asm__ ("r3") = (uint32_t*)&L_SHA256_transform_len_k; __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index d62a035cc..3dc2d1f20 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -44,6 +44,7 @@ #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __IAR_SYSTEMS_ICC__ */ #ifdef __KEIL__ #define __asm__ __asm @@ -97,12 +98,18 @@ static const uint64_t L_SHA512_transform_len_k[] = { }; void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) +#else +void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + register uint64_t* L_SHA512_transform_len_k_c __asm__ ("r3") = (uint64_t*)&L_SHA512_transform_len_k; __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 95c7820d7..a44fcfe51 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -240,17 +240,26 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a) #ifndef WOLFSSL_SP_SMALL #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x24\n\t" @@ -593,115 +602,129 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "STRD %[r], %[a], [sp, #36]\n\t" +#else + "STR %[r], [sp, #36]\n\t" + "STR %[a], [sp, #40]\n\t" +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[b]\n\t" - "LDM %[a], {%[r], %[a], %[b], r3}\n\t" + "LDM %[a], {r0, r1, r2, r3}\n\t" "LDM lr!, {r4, r5, r6}\n\t" - "UMULL r10, r11, %[r], r4\n\t" - "UMULL r12, r7, %[a], r4\n\t" - "UMAAL r11, r12, %[r], r5\n\t" - "UMULL r8, r9, %[b], r4\n\t" - "UMAAL r12, r8, %[a], r5\n\t" - "UMAAL r12, r7, %[r], r6\n\t" + "UMULL r10, r11, r0, r4\n\t" + "UMULL r12, r7, r1, r4\n\t" + "UMAAL r11, r12, r0, r5\n\t" + "UMULL r8, r9, r2, r4\n\t" + "UMAAL r12, r8, r1, r5\n\t" + "UMAAL r12, r7, r0, r6\n\t" "UMAAL r8, r9, r3, r4\n\t" "STM sp, {r10, r11, r12}\n\t" - "UMAAL r7, r8, %[b], r5\n\t" + "UMAAL r7, r8, r2, r5\n\t" "LDM lr!, {r4}\n\t" - "UMULL r10, r11, %[a], r6\n\t" - "UMAAL r8, r9, %[b], r6\n\t" - "UMAAL r7, r10, %[r], r4\n\t" + "UMULL r10, r11, r1, r6\n\t" + "UMAAL r8, r9, r2, r6\n\t" + "UMAAL r7, r10, r0, r4\n\t" "UMAAL r8, r11, r3, r5\n\t" "STR r7, [sp, #12]\n\t" - "UMAAL r8, r10, %[a], r4\n\t" + "UMAAL r8, r10, r1, r4\n\t" "UMAAL r9, r11, r3, r6\n\t" - "UMAAL r9, r10, %[b], r4\n\t" + "UMAAL r9, r10, r2, r4\n\t" "UMAAL r10, r11, r3, r4\n\t" "LDM lr, {r4, r5, r6, r7}\n\t" "MOV r12, #0x0\n\t" - "UMLAL r8, r12, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r4\n\t" - "UMAAL r10, r12, %[b], r4\n\t" + "UMLAL r8, r12, r0, r4\n\t" + "UMAAL r9, r12, r1, r4\n\t" + "UMAAL r10, r12, r2, r4\n\t" "UMAAL r11, r12, r3, r4\n\t" "MOV r4, #0x0\n\t" - "UMLAL r9, r4, %[r], r5\n\t" - "UMAAL r10, r4, %[a], r5\n\t" - "UMAAL r11, r4, %[b], r5\n\t" + "UMLAL r9, r4, r0, r5\n\t" + "UMAAL r10, r4, r1, r5\n\t" + "UMAAL r11, r4, r2, r5\n\t" "UMAAL r12, r4, r3, r5\n\t" "MOV r5, #0x0\n\t" - "UMLAL r10, r5, %[r], r6\n\t" - "UMAAL r11, r5, %[a], r6\n\t" - "UMAAL r12, r5, %[b], r6\n\t" + "UMLAL r10, r5, r0, r6\n\t" + "UMAAL r11, r5, r1, r6\n\t" + "UMAAL r12, r5, r2, r6\n\t" "UMAAL r4, r5, r3, r6\n\t" "MOV r6, #0x0\n\t" - "UMLAL r11, r6, %[r], r7\n\t" - "LDR %[r], [sp, #40]\n\t" - "UMAAL r12, r6, %[a], r7\n\t" - "ADD %[r], %[r], #0x10\n\t" - "UMAAL r4, r6, %[b], r7\n\t" + "UMLAL r11, r6, r0, r7\n\t" + "LDR r0, [sp, #40]\n\t" + "UMAAL r12, r6, r1, r7\n\t" + "ADD r0, r0, #0x10\n\t" + "UMAAL r4, r6, r2, r7\n\t" "SUB lr, lr, #0x10\n\t" "UMAAL r5, r6, r3, r7\n\t" - "LDM %[r], {%[r], %[a], %[b], r3}\n\t" + "LDM r0, {r0, r1, r2, r3}\n\t" "STR r6, [sp, #32]\n\t" "LDM lr!, {r6}\n\t" "MOV r7, #0x0\n\t" - "UMLAL r8, r7, %[r], r6\n\t" - "UMAAL r9, r7, %[a], r6\n\t" + "UMLAL r8, r7, r0, r6\n\t" + "UMAAL r9, r7, r1, r6\n\t" "STR r8, [sp, #16]\n\t" - "UMAAL r10, r7, %[b], r6\n\t" + "UMAAL r10, r7, r2, r6\n\t" "UMAAL r11, r7, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r8, #0x0\n\t" - "UMLAL r9, r8, %[r], r6\n\t" - "UMAAL r10, r8, %[a], r6\n\t" + "UMLAL r9, r8, r0, r6\n\t" + "UMAAL r10, r8, r1, r6\n\t" "STR r9, [sp, #20]\n\t" - "UMAAL r11, r8, %[b], r6\n\t" + "UMAAL r11, r8, r2, r6\n\t" "UMAAL r12, r8, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r9, #0x0\n\t" - "UMLAL r10, r9, %[r], r6\n\t" - "UMAAL r11, r9, %[a], r6\n\t" + "UMLAL r10, r9, r0, r6\n\t" + "UMAAL r11, r9, r1, r6\n\t" "STR r10, [sp, #24]\n\t" - "UMAAL r12, r9, %[b], r6\n\t" + "UMAAL r12, r9, r2, r6\n\t" "UMAAL r4, r9, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r10, #0x0\n\t" - "UMLAL r11, r10, %[r], r6\n\t" - "UMAAL r12, r10, %[a], r6\n\t" + "UMLAL r11, r10, r0, r6\n\t" + "UMAAL r12, r10, r1, r6\n\t" "STR r11, [sp, #28]\n\t" - "UMAAL r4, r10, %[b], r6\n\t" + "UMAAL r4, r10, r2, r6\n\t" "UMAAL r5, r10, r3, r6\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r12, r7, %[r], r11\n\t" - "UMAAL r4, r7, %[a], r11\n\t" + "UMAAL r12, r7, r0, r11\n\t" + "UMAAL r4, r7, r1, r11\n\t" "LDR r6, [sp, #32]\n\t" - "UMAAL r5, r7, %[b], r11\n\t" + "UMAAL r5, r7, r2, r11\n\t" "UMAAL r6, r7, r3, r11\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r4, r8, %[r], r11\n\t" - "UMAAL r5, r8, %[a], r11\n\t" - "UMAAL r6, r8, %[b], r11\n\t" + "UMAAL r4, r8, r0, r11\n\t" + "UMAAL r5, r8, r1, r11\n\t" + "UMAAL r6, r8, r2, r11\n\t" "UMAAL r7, r8, r3, r11\n\t" "LDM lr, {r11, lr}\n\t" - "UMAAL r5, r9, %[r], r11\n\t" - "UMAAL r6, r10, %[r], lr\n\t" - "UMAAL r6, r9, %[a], r11\n\t" - "UMAAL r7, r10, %[a], lr\n\t" - "UMAAL r7, r9, %[b], r11\n\t" - "UMAAL r8, r10, %[b], lr\n\t" + "UMAAL r5, r9, r0, r11\n\t" + "UMAAL r6, r10, r0, lr\n\t" + "UMAAL r6, r9, r1, r11\n\t" + "UMAAL r7, r10, r1, lr\n\t" + "UMAAL r7, r9, r2, r11\n\t" + "UMAAL r8, r10, r2, lr\n\t" "UMAAL r8, r9, r3, r11\n\t" "UMAAL r9, r10, r3, lr\n\t" "MOV r3, r12\n\t" @@ -725,11 +748,17 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -760,10 +789,16 @@ static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -808,11 +843,17 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -922,10 +963,16 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -998,11 +1045,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -1144,10 +1197,16 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -1276,11 +1335,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -1474,15 +1539,24 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, } #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -1717,62 +1791,71 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x20\n\t" "STR %[r], [sp, #28]\n\t" - "LDM %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" - "UMULL r9, r10, %[r], %[r]\n\t" - "UMULL r11, r12, %[r], %[a]\n\t" + "LDM %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" + "UMULL r9, r10, r0, r0\n\t" + "UMULL r11, r12, r0, r1\n\t" "ADDS r11, r11, r11\n\t" "MOV lr, #0x0\n\t" "UMAAL r10, r11, lr, lr\n\t" "STM sp, {r9, r10}\n\t" "MOV r8, lr\n\t" - "UMAAL r8, r12, %[r], r2\n\t" + "UMAAL r8, r12, r0, r2\n\t" "ADCS r8, r8, r8\n\t" - "UMAAL r8, r11, %[a], %[a]\n\t" - "UMULL r9, r10, %[r], r3\n\t" - "UMAAL r9, r12, %[a], r2\n\t" + "UMAAL r8, r11, r1, r1\n\t" + "UMULL r9, r10, r0, r3\n\t" + "UMAAL r9, r12, r1, r2\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STRD r8, r9, [sp, #8]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r10, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r3\n\t" + "UMAAL r9, r10, r0, r4\n\t" + "UMAAL r9, r12, r1, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r2, r2\n\t" "STR r9, [sp, #16]\n\t" - "UMULL r9, r8, %[r], r5\n\t" - "UMAAL r9, r12, %[a], r4\n\t" + "UMULL r9, r8, r0, r5\n\t" + "UMAAL r9, r12, r1, r4\n\t" "UMAAL r9, r10, r2, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STR r9, [sp, #20]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r8, %[r], r6\n\t" - "UMAAL r9, r12, %[a], r5\n\t" + "UMAAL r9, r8, r0, r6\n\t" + "UMAAL r9, r12, r1, r5\n\t" "UMAAL r9, r10, r2, r4\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r3, r3\n\t" "STR r9, [sp, #24]\n\t" - "UMULL %[r], r9, %[r], r7\n\t" - "UMAAL %[r], r8, %[a], r6\n\t" - "UMAAL %[r], r12, r2, r5\n\t" - "UMAAL %[r], r10, r3, r4\n\t" - "ADCS %[r], %[r], %[r]\n\t" - "UMAAL %[r], r11, lr, lr\n\t" + "UMULL r0, r9, r0, r7\n\t" + "UMAAL r0, r8, r1, r6\n\t" + "UMAAL r0, r12, r2, r5\n\t" + "UMAAL r0, r10, r3, r4\n\t" + "ADCS r0, r0, r0\n\t" + "UMAAL r0, r11, lr, lr\n\t" /* R[7] = r0 */ - "UMAAL r9, r8, %[a], r7\n\t" + "UMAAL r9, r8, r1, r7\n\t" "UMAAL r9, r10, r2, r6\n\t" "UMAAL r12, r9, r3, r5\n\t" "ADCS r12, r12, r12\n\t" @@ -1790,10 +1873,10 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) "ADCS r3, r2, r2\n\t" "UMAAL r10, r3, r5, r5\n\t" /* R[10] = r10 */ - "MOV %[a], lr\n\t" - "UMAAL %[a], r8, r4, r7\n\t" - "UMAAL %[a], r9, r5, r6\n\t" - "ADCS r4, %[a], %[a]\n\t" + "MOV r1, lr\n\t" + "UMAAL r1, r8, r4, r7\n\t" + "UMAAL r1, r9, r5, r6\n\t" + "ADCS r4, r1, r1\n\t" "UMAAL r3, r4, lr, lr\n\t" /* R[11] = r3 */ "UMAAL r8, r9, r5, r7\n\t" @@ -1812,14 +1895,14 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) /* R[15] = r7 */ "LDR lr, [sp, #28]\n\t" "ADD lr, lr, #0x1c\n\t" - "STM lr!, {%[r], r12}\n\t" + "STM lr!, {r0, r12}\n\t" "STM lr!, {r11}\n\t" "STM lr!, {r10}\n\t" "STM lr!, {r3, r4, r8, r9}\n\t" "STM lr!, {r7}\n\t" "SUB lr, lr, #0x40\n\t" - "LDM sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" - "STM lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "LDM sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" + "STM lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "ADD sp, sp, #0x20\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -1834,11 +1917,17 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -1905,11 +1994,17 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -1990,11 +2085,17 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -2105,18 +2206,24 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x100\n\t" "\n" "L_sp_2048_add_64_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -2147,10 +2254,16 @@ static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -2188,11 +2301,17 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x200\n\t" @@ -2263,10 +2382,16 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x200\n\t" @@ -2383,18 +2508,24 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x80\n\t" "\n" "L_sp_2048_add_32_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -2425,10 +2556,16 @@ static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -2466,11 +2603,17 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" @@ -2541,10 +2684,16 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" @@ -2665,11 +2814,17 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -2712,11 +2867,17 @@ static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -3070,12 +3231,18 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -3114,12 +3281,18 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -3245,17 +3418,24 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_SP_SMALL /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -3550,11 +3730,125 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_32_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x80\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_32_mul_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_32_mul_%=\n\t" +#endif + "LDR r10, [%[a], #128]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #128]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x80\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_32_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#else +#ifndef WOLFSSL_SP_SMALL +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -3752,6 +4046,101 @@ static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_32_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x80\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_32_mul_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_32_mul_%=\n\t" +#endif + "LDR r10, [%[a], #128]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #128]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x80\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_32_word_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_32_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ #endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -3790,11 +4179,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -3837,11 +4232,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -4021,11 +4422,17 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -4080,11 +4487,17 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -4144,16 +4557,22 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x7c\n\t" "\n" @@ -4940,12 +5359,18 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -4984,12 +5409,18 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -5227,17 +5658,24 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_SP_SMALL /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -5788,11 +6226,125 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_64_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x100\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_64_mul_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_64_mul_%=\n\t" +#endif + "LDR r10, [%[a], #256]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #256]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x100\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_64_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#else +#ifndef WOLFSSL_SP_SMALL +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -6150,6 +6702,101 @@ static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 2048 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_2048_mont_reduce_64_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x100\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_64_mul_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_64_mul_%=\n\t" +#endif + "LDR r10, [%[a], #256]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #256]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x100\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_2048_mont_reduce_64_word_%=\n\t" +#else + "BLT.N L_sp_2048_mont_reduce_64_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ #endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -6188,11 +6835,17 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -6229,11 +6882,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -6367,11 +7026,17 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -6426,11 +7091,17 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -6593,16 +7264,22 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0xfc\n\t" "\n" @@ -7864,12 +8541,18 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -7877,7 +8560,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "MOV r4, #0x0\n\t" "\n" "L_sp_2048_cond_add_32_words_%=:\n\t" - "ADDS r5, r5, #0x-1\n\t" + "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" "AND r7, r7, %[m]\n\t" @@ -7908,12 +8591,18 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -8352,11 +9041,17 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x1f\n\t" @@ -9166,11 +9861,17 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x30\n\t" @@ -10187,11 +10888,17 @@ static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -10229,10 +10936,16 @@ static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -10291,11 +11004,17 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -10423,10 +11142,16 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -10527,11 +11252,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -10701,10 +11432,16 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -10889,11 +11626,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -11147,10 +11890,16 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x30\n\t" @@ -11842,11 +12591,17 @@ static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -11920,11 +12675,17 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -12019,11 +12780,17 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -12162,18 +12929,24 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x180\n\t" "\n" "L_sp_3072_add_96_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -12204,10 +12977,16 @@ static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -12245,11 +13024,17 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x300\n\t" @@ -12320,10 +13105,16 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x300\n\t" @@ -12440,18 +13231,24 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0xc0\n\t" "\n" "L_sp_3072_add_48_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -12482,10 +13279,16 @@ static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -12523,11 +13326,17 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x180\n\t" @@ -12598,10 +13407,16 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x180\n\t" @@ -12722,11 +13537,17 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -12769,11 +13590,17 @@ static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -13287,12 +14114,18 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -13331,12 +14164,18 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -13518,17 +14357,24 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_SP_SMALL /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -13951,11 +14797,125 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_48_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0xc0\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_48_mul_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_48_mul_%=\n\t" +#endif + "LDR r10, [%[a], #192]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #192]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0xc0\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_48_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#else +#ifndef WOLFSSL_SP_SMALL +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -14233,6 +15193,101 @@ static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_48_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0xc0\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_48_mul_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_48_mul_%=\n\t" +#endif + "LDR r10, [%[a], #192]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #192]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0xc0\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_48_word_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_48_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ #endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -14271,11 +15326,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -14318,11 +15379,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -14582,11 +15649,17 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -14641,11 +15714,17 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -14705,16 +15784,22 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0xbc\n\t" "\n" @@ -15677,12 +16762,18 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -15721,12 +16812,18 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -16076,17 +17173,24 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_SP_SMALL /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -16893,11 +17997,125 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_96_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x180\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_96_mul_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_96_mul_%=\n\t" +#endif + "LDR r10, [%[a], #384]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #384]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x180\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_96_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#else +#ifndef WOLFSSL_SP_SMALL +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -17415,6 +18633,101 @@ static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 3072 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_3072_mont_reduce_96_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x180\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_96_mul_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_96_mul_%=\n\t" +#endif + "LDR r10, [%[a], #384]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #384]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x180\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_3072_mont_reduce_96_word_%=\n\t" +#else + "BLT.N L_sp_3072_mont_reduce_96_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ #endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -17453,11 +18766,17 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -17494,11 +18813,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -17688,11 +19013,17 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -17747,11 +19078,17 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -17914,16 +19251,22 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x17c\n\t" "\n" @@ -19537,12 +20880,18 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -19550,7 +20899,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp "MOV r4, #0x0\n\t" "\n" "L_sp_3072_cond_add_48_words_%=:\n\t" - "ADDS r5, r5, #0x-1\n\t" + "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" "AND r7, r7, %[m]\n\t" @@ -19581,12 +20930,18 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -20081,11 +21436,17 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x1f\n\t" @@ -21086,10 +22447,16 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a) * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -21330,11 +22697,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -21653,18 +23026,24 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x200\n\t" "\n" "L_sp_4096_add_128_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -21695,10 +23074,16 @@ static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -21736,11 +23121,17 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x400\n\t" @@ -21811,10 +23202,16 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x400\n\t" @@ -21933,11 +23330,17 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -21980,11 +23383,17 @@ static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -22659,12 +24068,18 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -22703,12 +24118,18 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -23170,17 +24591,24 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s #endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_NO_UMAAL +#ifndef WOLFSSL_SP_SMALL /* Reduce the number back to 4096 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -24243,11 +25671,125 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_4096_mont_reduce_128_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "MOV r5, #0x0\n\t" + "UMLAL r10, r5, r8, r7\n\t" + "ADDS r10, r10, r4\n\t" + "STR r10, [%[a], r12]\n\t" + "ADC r4, r5, #0x0\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x200\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_4096_mont_reduce_128_mul_%=\n\t" +#else + "BLT.N L_sp_4096_mont_reduce_128_mul_%=\n\t" +#endif + "LDR r10, [%[a], #512]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #512]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x200\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" +#else + "BLT.N L_sp_4096_mont_reduce_128_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#else +#ifndef WOLFSSL_SP_SMALL +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -24925,6 +26467,101 @@ static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +static void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "LDR r11, [%[m]]\n\t" + /* i = 0 */ + "MOV r9, #0x0\n\t" + /* ca = 0 */ + "MOV r3, #0x0\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=:\n\t" + /* mu = a[i] * mp */ + "LDR r10, [%[a]]\n\t" + "MUL r8, %[mp], r10\n\t" + /* j = 0 */ + "MOV r12, #0x0\n\t" + "MOV r4, #0x0\n\t" + "\n" + "L_sp_4096_mont_reduce_128_mul_%=:\n\t" + /* a[i+j+0] += m[j+0] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+2] += m[j+2] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + /* a[i+j+3] += m[j+3] * mu */ + "LDR r7, [%[m], r12]\n\t" + "LDR r10, [%[a], r12]\n\t" + "UMAAL r10, r4, r8, r7\n\t" + "STR r10, [%[a], r12]\n\t" + /* j += 1 */ + "ADD r12, r12, #0x4\n\t" + "CMP r12, #0x200\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_4096_mont_reduce_128_mul_%=\n\t" +#else + "BLT.N L_sp_4096_mont_reduce_128_mul_%=\n\t" +#endif + "LDR r10, [%[a], #512]\n\t" + "ADDS r4, r4, r3\n\t" + "MOV r3, #0x0\n\t" + "ADC r3, r3, #0x0\n\t" + "ADDS r10, r10, r4\n\t" + "ADC r3, r3, r3\n\t" + "STR r10, [%[a], #512]\n\t" + /* i += 1 */ + "ADD r9, r9, #0x4\n\t" + "ADD %[a], %[a], #0x4\n\t" + "CMP r9, #0x200\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BLT L_sp_4096_mont_reduce_128_word_%=\n\t" +#else + "BLT.N L_sp_4096_mont_reduce_128_word_%=\n\t" +#endif + /* Loop Done */ + "MOV %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); +} + +#endif /* !WOLFSSL_SP_SMALL */ #endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -24963,11 +26600,17 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -25004,11 +26647,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -25254,11 +26903,17 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -25313,11 +26968,17 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -25480,16 +27141,22 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x1fc\n\t" "\n" @@ -27455,12 +29122,18 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -27468,7 +29141,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp "MOV r4, #0x0\n\t" "\n" "L_sp_4096_cond_add_64_words_%=:\n\t" - "ADDS r5, r5, #0x-1\n\t" + "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" "AND r7, r7, %[m]\n\t" @@ -27499,12 +29172,18 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -28055,11 +29734,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x1f\n\t" @@ -29126,11 +30811,17 @@ static const sp_digit p256_b[8] = { * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" @@ -29198,17 +30889,26 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p #else #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x24\n\t" @@ -29551,115 +31251,129 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "STRD %[r], %[a], [sp, #36]\n\t" +#else + "STR %[r], [sp, #36]\n\t" + "STR %[a], [sp, #40]\n\t" +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[b]\n\t" - "LDM %[a], {%[r], %[a], %[b], r3}\n\t" + "LDM %[a], {r0, r1, r2, r3}\n\t" "LDM lr!, {r4, r5, r6}\n\t" - "UMULL r10, r11, %[r], r4\n\t" - "UMULL r12, r7, %[a], r4\n\t" - "UMAAL r11, r12, %[r], r5\n\t" - "UMULL r8, r9, %[b], r4\n\t" - "UMAAL r12, r8, %[a], r5\n\t" - "UMAAL r12, r7, %[r], r6\n\t" + "UMULL r10, r11, r0, r4\n\t" + "UMULL r12, r7, r1, r4\n\t" + "UMAAL r11, r12, r0, r5\n\t" + "UMULL r8, r9, r2, r4\n\t" + "UMAAL r12, r8, r1, r5\n\t" + "UMAAL r12, r7, r0, r6\n\t" "UMAAL r8, r9, r3, r4\n\t" "STM sp, {r10, r11, r12}\n\t" - "UMAAL r7, r8, %[b], r5\n\t" + "UMAAL r7, r8, r2, r5\n\t" "LDM lr!, {r4}\n\t" - "UMULL r10, r11, %[a], r6\n\t" - "UMAAL r8, r9, %[b], r6\n\t" - "UMAAL r7, r10, %[r], r4\n\t" + "UMULL r10, r11, r1, r6\n\t" + "UMAAL r8, r9, r2, r6\n\t" + "UMAAL r7, r10, r0, r4\n\t" "UMAAL r8, r11, r3, r5\n\t" "STR r7, [sp, #12]\n\t" - "UMAAL r8, r10, %[a], r4\n\t" + "UMAAL r8, r10, r1, r4\n\t" "UMAAL r9, r11, r3, r6\n\t" - "UMAAL r9, r10, %[b], r4\n\t" + "UMAAL r9, r10, r2, r4\n\t" "UMAAL r10, r11, r3, r4\n\t" "LDM lr, {r4, r5, r6, r7}\n\t" "MOV r12, #0x0\n\t" - "UMLAL r8, r12, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r4\n\t" - "UMAAL r10, r12, %[b], r4\n\t" + "UMLAL r8, r12, r0, r4\n\t" + "UMAAL r9, r12, r1, r4\n\t" + "UMAAL r10, r12, r2, r4\n\t" "UMAAL r11, r12, r3, r4\n\t" "MOV r4, #0x0\n\t" - "UMLAL r9, r4, %[r], r5\n\t" - "UMAAL r10, r4, %[a], r5\n\t" - "UMAAL r11, r4, %[b], r5\n\t" + "UMLAL r9, r4, r0, r5\n\t" + "UMAAL r10, r4, r1, r5\n\t" + "UMAAL r11, r4, r2, r5\n\t" "UMAAL r12, r4, r3, r5\n\t" "MOV r5, #0x0\n\t" - "UMLAL r10, r5, %[r], r6\n\t" - "UMAAL r11, r5, %[a], r6\n\t" - "UMAAL r12, r5, %[b], r6\n\t" + "UMLAL r10, r5, r0, r6\n\t" + "UMAAL r11, r5, r1, r6\n\t" + "UMAAL r12, r5, r2, r6\n\t" "UMAAL r4, r5, r3, r6\n\t" "MOV r6, #0x0\n\t" - "UMLAL r11, r6, %[r], r7\n\t" - "LDR %[r], [sp, #40]\n\t" - "UMAAL r12, r6, %[a], r7\n\t" - "ADD %[r], %[r], #0x10\n\t" - "UMAAL r4, r6, %[b], r7\n\t" + "UMLAL r11, r6, r0, r7\n\t" + "LDR r0, [sp, #40]\n\t" + "UMAAL r12, r6, r1, r7\n\t" + "ADD r0, r0, #0x10\n\t" + "UMAAL r4, r6, r2, r7\n\t" "SUB lr, lr, #0x10\n\t" "UMAAL r5, r6, r3, r7\n\t" - "LDM %[r], {%[r], %[a], %[b], r3}\n\t" + "LDM r0, {r0, r1, r2, r3}\n\t" "STR r6, [sp, #32]\n\t" "LDM lr!, {r6}\n\t" "MOV r7, #0x0\n\t" - "UMLAL r8, r7, %[r], r6\n\t" - "UMAAL r9, r7, %[a], r6\n\t" + "UMLAL r8, r7, r0, r6\n\t" + "UMAAL r9, r7, r1, r6\n\t" "STR r8, [sp, #16]\n\t" - "UMAAL r10, r7, %[b], r6\n\t" + "UMAAL r10, r7, r2, r6\n\t" "UMAAL r11, r7, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r8, #0x0\n\t" - "UMLAL r9, r8, %[r], r6\n\t" - "UMAAL r10, r8, %[a], r6\n\t" + "UMLAL r9, r8, r0, r6\n\t" + "UMAAL r10, r8, r1, r6\n\t" "STR r9, [sp, #20]\n\t" - "UMAAL r11, r8, %[b], r6\n\t" + "UMAAL r11, r8, r2, r6\n\t" "UMAAL r12, r8, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r9, #0x0\n\t" - "UMLAL r10, r9, %[r], r6\n\t" - "UMAAL r11, r9, %[a], r6\n\t" + "UMLAL r10, r9, r0, r6\n\t" + "UMAAL r11, r9, r1, r6\n\t" "STR r10, [sp, #24]\n\t" - "UMAAL r12, r9, %[b], r6\n\t" + "UMAAL r12, r9, r2, r6\n\t" "UMAAL r4, r9, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r10, #0x0\n\t" - "UMLAL r11, r10, %[r], r6\n\t" - "UMAAL r12, r10, %[a], r6\n\t" + "UMLAL r11, r10, r0, r6\n\t" + "UMAAL r12, r10, r1, r6\n\t" "STR r11, [sp, #28]\n\t" - "UMAAL r4, r10, %[b], r6\n\t" + "UMAAL r4, r10, r2, r6\n\t" "UMAAL r5, r10, r3, r6\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r12, r7, %[r], r11\n\t" - "UMAAL r4, r7, %[a], r11\n\t" + "UMAAL r12, r7, r0, r11\n\t" + "UMAAL r4, r7, r1, r11\n\t" "LDR r6, [sp, #32]\n\t" - "UMAAL r5, r7, %[b], r11\n\t" + "UMAAL r5, r7, r2, r11\n\t" "UMAAL r6, r7, r3, r11\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r4, r8, %[r], r11\n\t" - "UMAAL r5, r8, %[a], r11\n\t" - "UMAAL r6, r8, %[b], r11\n\t" + "UMAAL r4, r8, r0, r11\n\t" + "UMAAL r5, r8, r1, r11\n\t" + "UMAAL r6, r8, r2, r11\n\t" "UMAAL r7, r8, r3, r11\n\t" "LDM lr, {r11, lr}\n\t" - "UMAAL r5, r9, %[r], r11\n\t" - "UMAAL r6, r10, %[r], lr\n\t" - "UMAAL r6, r9, %[a], r11\n\t" - "UMAAL r7, r10, %[a], lr\n\t" - "UMAAL r7, r9, %[b], r11\n\t" - "UMAAL r8, r10, %[b], lr\n\t" + "UMAAL r5, r9, r0, r11\n\t" + "UMAAL r6, r10, r0, lr\n\t" + "UMAAL r6, r9, r1, r11\n\t" + "UMAAL r7, r10, r1, lr\n\t" + "UMAAL r7, r9, r2, r11\n\t" + "UMAAL r8, r10, r2, lr\n\t" "UMAAL r8, r9, r3, r11\n\t" "UMAAL r9, r10, r3, lr\n\t" "MOV r3, r12\n\t" @@ -29684,10 +31398,16 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" @@ -29780,15 +31500,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -30023,62 +31752,71 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* __IAR_SYSTEMS_ICC__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x20\n\t" "STR %[r], [sp, #28]\n\t" - "LDM %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" - "UMULL r9, r10, %[r], %[r]\n\t" - "UMULL r11, r12, %[r], %[a]\n\t" + "LDM %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" + "UMULL r9, r10, r0, r0\n\t" + "UMULL r11, r12, r0, r1\n\t" "ADDS r11, r11, r11\n\t" "MOV lr, #0x0\n\t" "UMAAL r10, r11, lr, lr\n\t" "STM sp, {r9, r10}\n\t" "MOV r8, lr\n\t" - "UMAAL r8, r12, %[r], r2\n\t" + "UMAAL r8, r12, r0, r2\n\t" "ADCS r8, r8, r8\n\t" - "UMAAL r8, r11, %[a], %[a]\n\t" - "UMULL r9, r10, %[r], r3\n\t" - "UMAAL r9, r12, %[a], r2\n\t" + "UMAAL r8, r11, r1, r1\n\t" + "UMULL r9, r10, r0, r3\n\t" + "UMAAL r9, r12, r1, r2\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STRD r8, r9, [sp, #8]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r10, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r3\n\t" + "UMAAL r9, r10, r0, r4\n\t" + "UMAAL r9, r12, r1, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r2, r2\n\t" "STR r9, [sp, #16]\n\t" - "UMULL r9, r8, %[r], r5\n\t" - "UMAAL r9, r12, %[a], r4\n\t" + "UMULL r9, r8, r0, r5\n\t" + "UMAAL r9, r12, r1, r4\n\t" "UMAAL r9, r10, r2, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STR r9, [sp, #20]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r8, %[r], r6\n\t" - "UMAAL r9, r12, %[a], r5\n\t" + "UMAAL r9, r8, r0, r6\n\t" + "UMAAL r9, r12, r1, r5\n\t" "UMAAL r9, r10, r2, r4\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r3, r3\n\t" "STR r9, [sp, #24]\n\t" - "UMULL %[r], r9, %[r], r7\n\t" - "UMAAL %[r], r8, %[a], r6\n\t" - "UMAAL %[r], r12, r2, r5\n\t" - "UMAAL %[r], r10, r3, r4\n\t" - "ADCS %[r], %[r], %[r]\n\t" - "UMAAL %[r], r11, lr, lr\n\t" + "UMULL r0, r9, r0, r7\n\t" + "UMAAL r0, r8, r1, r6\n\t" + "UMAAL r0, r12, r2, r5\n\t" + "UMAAL r0, r10, r3, r4\n\t" + "ADCS r0, r0, r0\n\t" + "UMAAL r0, r11, lr, lr\n\t" /* R[7] = r0 */ - "UMAAL r9, r8, %[a], r7\n\t" + "UMAAL r9, r8, r1, r7\n\t" "UMAAL r9, r10, r2, r6\n\t" "UMAAL r12, r9, r3, r5\n\t" "ADCS r12, r12, r12\n\t" @@ -30096,10 +31834,10 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "ADCS r3, r2, r2\n\t" "UMAAL r10, r3, r5, r5\n\t" /* R[10] = r10 */ - "MOV %[a], lr\n\t" - "UMAAL %[a], r8, r4, r7\n\t" - "UMAAL %[a], r9, r5, r6\n\t" - "ADCS r4, %[a], %[a]\n\t" + "MOV r1, lr\n\t" + "UMAAL r1, r8, r4, r7\n\t" + "UMAAL r1, r9, r5, r6\n\t" + "ADCS r4, r1, r1\n\t" "UMAAL r3, r4, lr, lr\n\t" /* R[11] = r3 */ "UMAAL r8, r9, r5, r7\n\t" @@ -30118,14 +31856,14 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) /* R[15] = r7 */ "LDR lr, [sp, #28]\n\t" "ADD lr, lr, #0x1c\n\t" - "STM lr!, {%[r], r12}\n\t" + "STM lr!, {r0, r12}\n\t" "STM lr!, {r11}\n\t" "STM lr!, {r10}\n\t" "STM lr!, {r3, r4, r8, r9}\n\t" "STM lr!, {r7}\n\t" "SUB lr, lr, #0x40\n\t" - "LDM sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" - "STM lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "LDM sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" + "STM lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "ADD sp, sp, #0x20\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -30142,18 +31880,24 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x20\n\t" "\n" "L_sp_256_add_8_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -30184,11 +31928,17 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -30221,10 +31971,16 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* * a The number to convert. * m The modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x18\n\t" @@ -30232,7 +31988,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di /* Clear overflow and underflow */ "MOV r11, #0x0\n\t" "MOV r12, #0x0\n\t" - "# t[0] = 1 1 0 -1 -1 -1 -1 0\n\t" + /* t[0] = 1 1 0 -1 -1 -1 -1 0 */ "ADDS r10, r2, r3\n\t" "ADC r11, r11, #0x0\n\t" "SUBS r10, r10, r5\n\t" @@ -30247,7 +32003,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r10, [sp]\n\t" "neg r12, r12\n\t" "MOV r10, #0x0\n\t" - "# t[1] = 0 1 1 0 -1 -1 -1 -1\n\t" + /* t[1] = 0 1 1 0 -1 -1 -1 -1 */ "ADDS r11, r11, r3\n\t" "ADC r10, r10, #0x0\n\t" "ADDS r11, r11, r4\n\t" @@ -30266,7 +32022,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r11, [sp, #4]\n\t" "neg r12, r12\n\t" "MOV r11, #0x0\n\t" - "# t[2] = 0 0 1 1 0 -1 -1 -1\n\t" + /* t[2] = 0 0 1 1 0 -1 -1 -1 */ "ADDS r10, r10, r4\n\t" "ADC r11, r11, #0x0\n\t" "ADDS r10, r10, r5\n\t" @@ -30283,7 +32039,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r10, [sp, #8]\n\t" "neg r12, r12\n\t" "MOV r10, #0x0\n\t" - "# t[3] = -1 -1 0 2 2 1 0 -1\n\t" + /* t[3] = -1 -1 0 2 2 1 0 -1 */ "ADDS r11, r11, r5\n\t" "ADC r10, r10, #0x0\n\t" "ADDS r11, r11, r5\n\t" @@ -30306,7 +32062,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r11, [sp, #12]\n\t" "neg r12, r12\n\t" "MOV r11, #0x0\n\t" - "# t[4] = 0 -1 -1 0 2 2 1 0\n\t" + /* t[4] = 0 -1 -1 0 2 2 1 0 */ "ADDS r10, r10, r6\n\t" "ADC r11, r11, #0x0\n\t" "ADDS r10, r10, r6\n\t" @@ -30327,7 +32083,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r10, [sp, #16]\n\t" "neg r12, r12\n\t" "MOV r10, #0x0\n\t" - "# t[5] = 0 0 -1 -1 0 2 2 1\n\t" + /* t[5] = 0 0 -1 -1 0 2 2 1 */ "ADDS r11, r11, r7\n\t" "ADC r10, r10, #0x0\n\t" "ADDS r11, r11, r7\n\t" @@ -30348,7 +32104,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "STR r11, [sp, #20]\n\t" "neg r12, r12\n\t" "MOV r11, #0x0\n\t" - "# t[6] = -1 -1 0 0 0 1 3 2\n\t" + /* t[6] = -1 -1 0 0 0 1 3 2 */ "ADDS r10, r10, r7\n\t" "ADC r11, r11, #0x0\n\t" "ADDS r10, r10, r8\n\t" @@ -30371,7 +32127,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "MOV r8, r10\n\t" "neg r12, r12\n\t" "MOV r10, #0x0\n\t" - "# t[7] = 1 0 -1 -1 -1 -1 0 3\n\t" + /* t[7] = 1 0 -1 -1 -1 -1 0 3 */ "ADDS r11, r11, r2\n\t" "ADC r10, r10, #0x0\n\t" "ADDS r11, r11, r9\n\t" @@ -30447,7 +32203,11 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ return (uint32_t)(size_t)r; } @@ -30647,6 +32407,9 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) } #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -30656,11 +32419,17 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * m Modulus (prime). * mp Montgomery multiplier. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -31119,11 +32888,22 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* Multiply two Montgomery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -31133,109 +32913,120 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * m Modulus (prime). * mp Montgomery multiplier. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x4c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG "STRD %[r], %[a], [sp, #68]\n\t" +#else + "STR %[r], [sp, #68]\n\t" + "STR %[a], [sp, #72]\n\t" +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[b]\n\t" - "LDM %[a], {%[r], %[a], %[b], r3}\n\t" + "LDM %[a], {r0, r1, r2, r3}\n\t" "LDM lr!, {r4, r5, r6}\n\t" - "UMULL r10, r11, %[r], r4\n\t" - "UMULL r12, r7, %[a], r4\n\t" - "UMAAL r11, r12, %[r], r5\n\t" - "UMULL r8, r9, %[b], r4\n\t" - "UMAAL r12, r8, %[a], r5\n\t" - "UMAAL r12, r7, %[r], r6\n\t" + "UMULL r10, r11, r0, r4\n\t" + "UMULL r12, r7, r1, r4\n\t" + "UMAAL r11, r12, r0, r5\n\t" + "UMULL r8, r9, r2, r4\n\t" + "UMAAL r12, r8, r1, r5\n\t" + "UMAAL r12, r7, r0, r6\n\t" "UMAAL r8, r9, r3, r4\n\t" "STM sp, {r10, r11, r12}\n\t" - "UMAAL r7, r8, %[b], r5\n\t" + "UMAAL r7, r8, r2, r5\n\t" "LDM lr!, {r4}\n\t" - "UMULL r10, r11, %[a], r6\n\t" - "UMAAL r8, r9, %[b], r6\n\t" - "UMAAL r7, r10, %[r], r4\n\t" + "UMULL r10, r11, r1, r6\n\t" + "UMAAL r8, r9, r2, r6\n\t" + "UMAAL r7, r10, r0, r4\n\t" "UMAAL r8, r11, r3, r5\n\t" "STR r7, [sp, #12]\n\t" - "UMAAL r8, r10, %[a], r4\n\t" + "UMAAL r8, r10, r1, r4\n\t" "UMAAL r9, r11, r3, r6\n\t" - "UMAAL r9, r10, %[b], r4\n\t" + "UMAAL r9, r10, r2, r4\n\t" "UMAAL r10, r11, r3, r4\n\t" "LDM lr, {r4, r5, r6, r7}\n\t" "MOV r12, #0x0\n\t" - "UMLAL r8, r12, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r4\n\t" - "UMAAL r10, r12, %[b], r4\n\t" + "UMLAL r8, r12, r0, r4\n\t" + "UMAAL r9, r12, r1, r4\n\t" + "UMAAL r10, r12, r2, r4\n\t" "UMAAL r11, r12, r3, r4\n\t" "MOV r4, #0x0\n\t" - "UMLAL r9, r4, %[r], r5\n\t" - "UMAAL r10, r4, %[a], r5\n\t" - "UMAAL r11, r4, %[b], r5\n\t" + "UMLAL r9, r4, r0, r5\n\t" + "UMAAL r10, r4, r1, r5\n\t" + "UMAAL r11, r4, r2, r5\n\t" "UMAAL r12, r4, r3, r5\n\t" "MOV r5, #0x0\n\t" - "UMLAL r10, r5, %[r], r6\n\t" - "UMAAL r11, r5, %[a], r6\n\t" - "UMAAL r12, r5, %[b], r6\n\t" + "UMLAL r10, r5, r0, r6\n\t" + "UMAAL r11, r5, r1, r6\n\t" + "UMAAL r12, r5, r2, r6\n\t" "UMAAL r4, r5, r3, r6\n\t" "MOV r6, #0x0\n\t" - "UMLAL r11, r6, %[r], r7\n\t" - "LDR %[r], [sp, #72]\n\t" - "UMAAL r12, r6, %[a], r7\n\t" - "ADD %[r], %[r], #0x10\n\t" - "UMAAL r4, r6, %[b], r7\n\t" + "UMLAL r11, r6, r0, r7\n\t" + "LDR r0, [sp, #72]\n\t" + "UMAAL r12, r6, r1, r7\n\t" + "ADD r0, r0, #0x10\n\t" + "UMAAL r4, r6, r2, r7\n\t" "SUB lr, lr, #0x10\n\t" "UMAAL r5, r6, r3, r7\n\t" - "LDM %[r], {%[r], %[a], %[b], r3}\n\t" + "LDM r0, {r0, r1, r2, r3}\n\t" "STR r6, [sp, #64]\n\t" "LDM lr!, {r6}\n\t" "MOV r7, #0x0\n\t" - "UMLAL r8, r7, %[r], r6\n\t" - "UMAAL r9, r7, %[a], r6\n\t" + "UMLAL r8, r7, r0, r6\n\t" + "UMAAL r9, r7, r1, r6\n\t" "STR r8, [sp, #16]\n\t" - "UMAAL r10, r7, %[b], r6\n\t" + "UMAAL r10, r7, r2, r6\n\t" "UMAAL r11, r7, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r8, #0x0\n\t" - "UMLAL r9, r8, %[r], r6\n\t" - "UMAAL r10, r8, %[a], r6\n\t" + "UMLAL r9, r8, r0, r6\n\t" + "UMAAL r10, r8, r1, r6\n\t" "STR r9, [sp, #20]\n\t" - "UMAAL r11, r8, %[b], r6\n\t" + "UMAAL r11, r8, r2, r6\n\t" "UMAAL r12, r8, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r9, #0x0\n\t" - "UMLAL r10, r9, %[r], r6\n\t" - "UMAAL r11, r9, %[a], r6\n\t" + "UMLAL r10, r9, r0, r6\n\t" + "UMAAL r11, r9, r1, r6\n\t" "STR r10, [sp, #24]\n\t" - "UMAAL r12, r9, %[b], r6\n\t" + "UMAAL r12, r9, r2, r6\n\t" "UMAAL r4, r9, r3, r6\n\t" "LDM lr!, {r6}\n\t" "MOV r10, #0x0\n\t" - "UMLAL r11, r10, %[r], r6\n\t" - "UMAAL r12, r10, %[a], r6\n\t" + "UMLAL r11, r10, r0, r6\n\t" + "UMAAL r12, r10, r1, r6\n\t" "STR r11, [sp, #28]\n\t" - "UMAAL r4, r10, %[b], r6\n\t" + "UMAAL r4, r10, r2, r6\n\t" "UMAAL r5, r10, r3, r6\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r12, r7, %[r], r11\n\t" - "UMAAL r4, r7, %[a], r11\n\t" + "UMAAL r12, r7, r0, r11\n\t" + "UMAAL r4, r7, r1, r11\n\t" "LDR r6, [sp, #64]\n\t" - "UMAAL r5, r7, %[b], r11\n\t" + "UMAAL r5, r7, r2, r11\n\t" "UMAAL r6, r7, r3, r11\n\t" "LDM lr!, {r11}\n\t" - "UMAAL r4, r8, %[r], r11\n\t" - "UMAAL r5, r8, %[a], r11\n\t" - "UMAAL r6, r8, %[b], r11\n\t" + "UMAAL r4, r8, r0, r11\n\t" + "UMAAL r5, r8, r1, r11\n\t" + "UMAAL r6, r8, r2, r11\n\t" "UMAAL r7, r8, r3, r11\n\t" "LDM lr, {r11, lr}\n\t" - "UMAAL r5, r9, %[r], r11\n\t" - "UMAAL r6, r10, %[r], lr\n\t" - "UMAAL r6, r9, %[a], r11\n\t" - "UMAAL r7, r10, %[a], lr\n\t" - "UMAAL r7, r9, %[b], r11\n\t" - "UMAAL r8, r10, %[b], lr\n\t" + "UMAAL r5, r9, r0, r11\n\t" + "UMAAL r6, r10, r0, lr\n\t" + "UMAAL r6, r9, r1, r11\n\t" + "UMAAL r7, r10, r1, lr\n\t" + "UMAAL r7, r9, r2, r11\n\t" + "UMAAL r8, r10, r2, lr\n\t" "UMAAL r8, r9, r3, r11\n\t" "UMAAL r9, r10, r3, lr\n\t" "MOV r3, r12\n\t" @@ -31369,12 +33160,23 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif #ifdef WOLFSSL_SP_NO_UMAAL +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) * * r Result of squaring. @@ -31382,10 +33184,16 @@ static void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * m Modulus (prime). * mp Montgomery multiplier. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -31736,11 +33544,22 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #else +#ifdef __IAR_SYSTEMS_ICC__ +#pragma inline=never +#endif /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) * * r Result of squaring. @@ -31748,57 +33567,63 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * m Modulus (prime). * mp Montgomery multiplier. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" "STR %[r], [sp, #64]\n\t" - "LDM %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" - "UMULL r9, r10, %[r], %[r]\n\t" - "UMULL r11, r12, %[r], %[a]\n\t" + "LDM %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" + "UMULL r9, r10, r0, r0\n\t" + "UMULL r11, r12, r0, r1\n\t" "ADDS r11, r11, r11\n\t" "MOV lr, #0x0\n\t" "UMAAL r10, r11, lr, lr\n\t" "STM sp, {r9, r10}\n\t" "MOV r8, lr\n\t" - "UMAAL r8, r12, %[r], r2\n\t" + "UMAAL r8, r12, r0, r2\n\t" "ADCS r8, r8, r8\n\t" - "UMAAL r8, r11, %[a], %[a]\n\t" - "UMULL r9, r10, %[r], r3\n\t" - "UMAAL r9, r12, %[a], r2\n\t" + "UMAAL r8, r11, r1, r1\n\t" + "UMULL r9, r10, r0, r3\n\t" + "UMAAL r9, r12, r1, r2\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STRD r8, r9, [sp, #8]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r10, %[r], r4\n\t" - "UMAAL r9, r12, %[a], r3\n\t" + "UMAAL r9, r10, r0, r4\n\t" + "UMAAL r9, r12, r1, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r2, r2\n\t" "STR r9, [sp, #16]\n\t" - "UMULL r9, r8, %[r], r5\n\t" - "UMAAL r9, r12, %[a], r4\n\t" + "UMULL r9, r8, r0, r5\n\t" + "UMAAL r9, r12, r1, r4\n\t" "UMAAL r9, r10, r2, r3\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, lr, lr\n\t" "STR r9, [sp, #20]\n\t" "MOV r9, lr\n\t" - "UMAAL r9, r8, %[r], r6\n\t" - "UMAAL r9, r12, %[a], r5\n\t" + "UMAAL r9, r8, r0, r6\n\t" + "UMAAL r9, r12, r1, r5\n\t" "UMAAL r9, r10, r2, r4\n\t" "ADCS r9, r9, r9\n\t" "UMAAL r9, r11, r3, r3\n\t" "STR r9, [sp, #24]\n\t" - "UMULL %[r], r9, %[r], r7\n\t" - "UMAAL %[r], r8, %[a], r6\n\t" - "UMAAL %[r], r12, r2, r5\n\t" - "UMAAL %[r], r10, r3, r4\n\t" - "ADCS %[r], %[r], %[r]\n\t" - "UMAAL %[r], r11, lr, lr\n\t" + "UMULL r0, r9, r0, r7\n\t" + "UMAAL r0, r8, r1, r6\n\t" + "UMAAL r0, r12, r2, r5\n\t" + "UMAAL r0, r10, r3, r4\n\t" + "ADCS r0, r0, r0\n\t" + "UMAAL r0, r11, lr, lr\n\t" /* R[7] = r0 */ - "UMAAL r9, r8, %[a], r7\n\t" + "UMAAL r9, r8, r1, r7\n\t" "UMAAL r9, r10, r2, r6\n\t" "UMAAL r12, r9, r3, r5\n\t" "ADCS r12, r12, r12\n\t" @@ -31816,10 +33641,10 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "ADCS r3, r2, r2\n\t" "UMAAL r10, r3, r5, r5\n\t" /* R[10] = r10 */ - "MOV %[a], lr\n\t" - "UMAAL %[a], r8, r4, r7\n\t" - "UMAAL %[a], r9, r5, r6\n\t" - "ADCS r4, %[a], %[a]\n\t" + "MOV r1, lr\n\t" + "UMAAL r1, r8, r4, r7\n\t" + "UMAAL r1, r9, r5, r6\n\t" + "ADCS r4, r1, r1\n\t" "UMAAL r3, r4, lr, lr\n\t" /* R[11] = r3 */ "UMAAL r8, r9, r5, r7\n\t" @@ -31838,7 +33663,7 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit /* R[15] = r7 */ "MOV lr, sp\n\t" "ADD lr, lr, #0x1c\n\t" - "STM lr!, {%[r], r12}\n\t" + "STM lr!, {r0, r12}\n\t" "STM lr!, {r11}\n\t" "STM lr!, {r10}\n\t" "STM lr!, {r3, r4, r8, r9}\n\t" @@ -31971,8 +33796,16 @@ static void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif @@ -32079,16 +33912,22 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x1c\n\t" "\n" @@ -32221,12 +34060,18 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -32265,12 +34110,18 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -32321,11 +34172,17 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -32428,11 +34285,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -32518,9 +34381,15 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -32658,8 +34527,16 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #ifdef WOLFSSL_SP_NO_UMAAL @@ -32669,11 +34546,17 @@ static void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -32776,11 +34659,17 @@ static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_di * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -32907,11 +34796,17 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV lr, #0x0\n\t" @@ -32953,7 +34848,11 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Double a Montgomery form number (r = a + a % m). @@ -32962,10 +34861,16 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to double in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r2, #0x0\n\t" @@ -33003,7 +34908,11 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Triple a Montgomery form number (r = a + a + a % m). @@ -33012,10 +34921,16 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to triple in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, #0x0\n\t" @@ -33085,7 +35000,11 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -33095,11 +35014,17 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * b Number to subtract with in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV lr, #0x0\n\t" @@ -33139,7 +35064,11 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -33148,11 +35077,17 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to divide. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r4, r5, r6, r7}\n\t" @@ -36706,9 +38641,15 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_add_one_8(sp_digit* a_p) +#else +static void sp_256_add_one_8(sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r1, r2, r3, r4}\n\t" @@ -37104,10 +39045,16 @@ int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -37143,10 +39090,16 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -37179,11 +39132,17 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -37226,11 +39185,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -37290,11 +39255,17 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -37349,11 +39320,17 @@ static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -38058,11 +40035,17 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -38099,11 +40082,17 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -38129,10 +40118,16 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* } #endif /* WOLFSSL_SP_SMALL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -38173,11 +40168,17 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, #0x0\n\t" @@ -38237,9 +40238,15 @@ static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static int sp_256_num_bits_8(const sp_digit* a_p) +#else +static int sp_256_num_bits_8(const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR r1, [%[a], #28]\n\t" @@ -39439,11 +41446,17 @@ static const sp_digit p384_b[12] = { * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x60\n\t" @@ -39516,11 +41529,17 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x30\n\t" @@ -40538,10 +42557,16 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x60\n\t" @@ -40638,10 +42663,16 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x30\n\t" @@ -41335,18 +43366,24 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x30\n\t" "\n" "L_sp_384_add_12_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -41377,11 +43414,17 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -41721,12 +43764,18 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -41765,12 +43814,18 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -41834,11 +43889,17 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -41973,11 +44034,17 @@ static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit m * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -42225,16 +44292,22 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x2c\n\t" "\n" @@ -42449,12 +44522,18 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ sp_digit o; @@ -42468,11 +44547,17 @@ static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a Number to double in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ sp_digit o; @@ -42486,11 +44571,17 @@ static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a Number to triple in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ sp_digit o; @@ -42507,11 +44598,17 @@ static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -42548,11 +44645,17 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -42594,12 +44697,18 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -42607,7 +44716,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ "MOV r4, #0x0\n\t" "\n" "L_sp_384_cond_add_12_words_%=:\n\t" - "ADDS r5, r5, #0x-1\n\t" + "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" "AND r7, r7, %[m]\n\t" @@ -42638,12 +44747,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -42705,12 +44820,18 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b Number to subtract with in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ sp_digit o; @@ -42721,10 +44842,16 @@ static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi #ifdef WOLFSSL_SP_SMALL #else #endif /* WOLFSSL_SP_SMALL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3}\n\t" @@ -46360,9 +48487,15 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_add_one_12(sp_digit* a_p) +#else +static void sp_384_add_one_12(sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r1, r2, r3, r4}\n\t" @@ -46764,10 +48897,16 @@ int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -46803,10 +48942,16 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -46846,11 +48991,17 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -46893,11 +49044,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -46977,11 +49134,17 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -47036,11 +49199,17 @@ static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -47715,11 +49884,17 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a Number to divide. * m Modulus. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r4}\n\t" @@ -47817,9 +49992,15 @@ static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static int sp_384_num_bits_12(const sp_digit* a_p) +#else +static int sp_384_num_bits_12(const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR r1, [%[a], #44]\n\t" @@ -49117,11 +51298,17 @@ static const sp_digit p521_b[17] = { * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" @@ -49197,11 +51384,17 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -51233,10 +53426,16 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" @@ -51336,10 +53535,16 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -52617,18 +54822,24 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x40\n\t" "\n" "L_sp_521_add_17_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -52644,7 +54855,7 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit #else "BNE.N L_sp_521_add_17_word_%=\n\t" #endif - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a], {r4}\n\t" "LDM %[b], {r8}\n\t" "ADCS r4, r4, r8\n\t" @@ -52665,11 +54876,17 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -52936,12 +55153,18 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -52980,12 +55203,18 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -53065,9 +55294,15 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_ * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x44\n\t" @@ -53177,8 +55412,16 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)mp_p; +#else + (void)mp; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #ifdef WOLFSSL_SP_NO_UMAAL @@ -53188,11 +55431,17 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -53448,11 +55697,17 @@ static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_d * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -53803,16 +56058,22 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x40\n\t" "\n" @@ -54082,11 +56343,17 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" @@ -54153,7 +56420,11 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Double a Montgomery form number (r = a + a % m). @@ -54162,10 +56433,16 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a Number to double in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r2, #0x0\n\t" @@ -54223,7 +56500,11 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Triple a Montgomery form number (r = a + a + a % m). @@ -54232,10 +56513,16 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a Number to triple in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r2, #0x0\n\t" @@ -54313,7 +56600,11 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -54323,11 +56614,17 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi * b Number to subtract with in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" @@ -54395,13 +56692,23 @@ static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG (void)m_p; +#else + (void)m; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3}\n\t" @@ -58679,9 +60986,15 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_add_one_17(sp_digit* a_p) +#else +static void sp_521_add_one_17(sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r1, r2, r3, r4}\n\t" @@ -59085,11 +61398,17 @@ int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x20\n\t" @@ -59184,11 +61503,17 @@ static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x1f\n\t" @@ -59300,11 +61625,17 @@ static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p) +#else +static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "RSB r7, %[n], #0x1f\n\t" @@ -59524,10 +61855,16 @@ static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -59568,10 +61905,16 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -59622,11 +61965,17 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -59669,11 +62018,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -59778,11 +62133,17 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -59837,11 +62198,17 @@ static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -60547,11 +62914,17 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r11, #0x0\n\t" @@ -60593,11 +62966,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -60647,11 +63026,17 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to divide. * m Modulus. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r4}\n\t" @@ -60784,9 +63169,15 @@ static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi ); } +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static int sp_521_num_bits_17(const sp_digit* a_p) +#else +static int sp_521_num_bits_17(const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR r1, [%[a], #64]\n\t" @@ -62032,11 +64423,17 @@ typedef struct sp_point_1024 { * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" @@ -63834,10 +66231,16 @@ static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x40\n\t" @@ -64981,11 +67384,17 @@ static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -65030,10 +67439,16 @@ static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer and result. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -65106,11 +67521,17 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -65253,11 +67674,17 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r3, r4, r5, r6}\n\t" @@ -65339,11 +67766,17 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" @@ -65414,10 +67847,16 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b * r A single precision integer. * a A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x100\n\t" @@ -65600,10 +68039,16 @@ static const sp_point_1024 p1024_base = { * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -65643,12 +68088,18 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r8, #0x0\n\t" @@ -65687,12 +68138,18 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -65824,18 +68281,24 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp * a A single precision integer. * b A single precision integer. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r3, #0x0\n\t" "ADD r12, %[a], #0x80\n\t" "\n" "L_sp_1024_add_32_word_%=:\n\t" - "ADDS r3, r3, #0x-1\n\t" + "ADDS r3, r3, #0xffffffff\n\t" "LDM %[a]!, {r4, r5, r6, r7}\n\t" "LDM %[b]!, {r8, r9, r10, r11}\n\t" "ADCS r4, r4, r8\n\t" @@ -65867,11 +68330,17 @@ static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -65914,11 +68383,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * a A single precision integer. * b A single precision digit. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) +#else +static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* A[0] * B */ @@ -66098,11 +68573,17 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r8, %[div], #16\n\t" @@ -66157,11 +68638,17 @@ static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * * Note that this is an approximate div. It may give an answer 1 larger. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) +#else +static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LSR r5, %[div], #1\n\t" @@ -66251,16 +68738,22 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) +#else +static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r2, #0x-1\n\t" + "MOV r2, #0xffffffff\n\t" "MOV r8, #0x1\n\t" "MOV r7, #0x0\n\t" - "MOV r3, #0x-1\n\t" + "MOV r3, #0xffffffff\n\t" #ifdef WOLFSSL_SP_SMALL "MOV r6, #0x7c\n\t" "\n" @@ -66977,11 +69470,17 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDR lr, [%[m]]\n\t" @@ -67281,11 +69780,17 @@ static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +#else +static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* i = 0 */ @@ -67633,12 +70138,18 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, #0x0\n\t" @@ -67805,11 +70316,17 @@ static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a Number to double in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, #0x0\n\t" @@ -67960,11 +70477,17 @@ static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a Number to triple in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +#else +static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r12, #0x0\n\t" @@ -68271,12 +70794,18 @@ static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * b Number to subtract with in Montgomery form. * m Modulus (prime). */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +#else +static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a]!, {r4, r5, r6, r7}\n\t" @@ -68440,12 +70969,18 @@ static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r5, #0x0\n\t" @@ -68453,7 +70988,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "MOV r4, #0x0\n\t" "\n" "L_sp_1024_cond_add_32_words_%=:\n\t" - "ADDS r5, r5, #0x-1\n\t" + "ADDS r5, r5, #0xffffffff\n\t" "LDR r6, [%[a], r4]\n\t" "LDR r7, [%[b], r4]\n\t" "AND r7, r7, %[m]\n\t" @@ -68484,12 +71019,18 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +#else +static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "MOV r10, #0x0\n\t" @@ -68614,10 +71155,16 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp } #endif /* WOLFSSL_SP_SMALL */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) +#else +static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "LDM %[a], {r2, r3}\n\t" diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 4c423bc82..6879391de 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -1,5 +1,5 @@ -/* sp_x86_64_asm - * +/* sp_x86_64_asm.S */ +/* * Copyright (C) 2006-2023 wolfSSL Inc. * * This file is part of wolfSSL. diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index 9e7523d7b..fa6558cc9 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -1,5 +1,5 @@ -; /* sp_x86_64_asm -; * +; /* sp_x86_64_asm.asm */ +; /* ; * Copyright (C) 2006-2023 wolfSSL Inc. ; * ; * This file is part of wolfSSL.