ARM32 inline ASM: make all vars input when not assigning regs

Compiler doesn't keep parameters in the same registers as passed if they
are output registers.
This commit is contained in:
Sean Parkinson
2025-03-27 10:51:01 +10:00
parent cfab666369
commit ea677dd30d
8 changed files with 409 additions and 0 deletions

View File

@@ -420,10 +420,17 @@ void AES_invert_key(unsigned char* ks, word32 rounds)
"str r8, [%[ks]], #4\n\t"
"subs r11, r11, #1\n\t"
"bne L_AES_invert_key_mix_loop_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ks] "+r" (ks), [rounds] "+r" (rounds),
[L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c),
[L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c)
:
#else
:
: [ks] "r" (ks), [rounds] "r" (rounds),
[L_AES_ARM32_te] "r" (L_AES_ARM32_te_c),
[L_AES_ARM32_td] "r" (L_AES_ARM32_td_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -956,10 +963,17 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"bne L_AES_set_encrypt_key_loop_128_%=\n\t"
"\n"
"L_AES_set_encrypt_key_end_%=: \n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks),
[L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c),
[L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c)
:
#else
:
: [key] "r" (key), [len] "r" (len), [ks] "r" (ks),
[L_AES_ARM32_te] "r" (L_AES_ARM32_te_c),
[L_AES_ARM32_rcon] "r" (L_AES_ARM32_rcon_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8"
);
}
@@ -1617,8 +1631,13 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks)
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks)
:
#else
:
: [te] "r" (te), [nr] "r" (nr), [len] "r" (len), [ks] "r" (ks)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -1884,9 +1903,15 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
"\n"
"L_AES_ECB_encrypt_end_%=: \n\t"
"pop {%[ks]}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [L_AES_ARM32_te_ecb] "r" (L_AES_ARM32_te_ecb_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
@@ -2175,10 +2200,17 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
"L_AES_CBC_encrypt_end_%=: \n\t"
"pop {%[ks], r9}\n\t"
"stm r9, {r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [iv] "+r" (iv),
[L_AES_ARM32_te_cbc] "+r" (L_AES_ARM32_te_cbc_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [iv] "r" (iv),
[L_AES_ARM32_te_cbc] "r" (L_AES_ARM32_te_cbc_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
}
@@ -2468,10 +2500,17 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"rev r7, r7\n\t"
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
"stm r8, {r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [ctr] "+r" (ctr),
[L_AES_ARM32_te_ctr] "+r" (L_AES_ARM32_te_ctr_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [ctr] "r" (ctr),
[L_AES_ARM32_te_ctr] "r" (L_AES_ARM32_te_ctr_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
}
@@ -3130,8 +3169,13 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4)
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4)
:
#else
:
: [td] "r" (td), [nr] "r" (nr), [td4] "r" (td4)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -3431,10 +3475,17 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
"bne L_AES_ECB_decrypt_loop_block_128_%=\n\t"
"\n"
"L_AES_ECB_decrypt_end_%=: \n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c),
[L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [L_AES_ARM32_td_ecb] "r" (L_AES_ARM32_td_ecb_c),
[L_AES_ARM32_td4] "r" (L_AES_ARM32_td4_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
}
@@ -4086,11 +4137,19 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"\n"
"L_AES_CBC_decrypt_end_%=: \n\t"
"pop {%[ks]-r4}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [iv] "+r" (iv),
[L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c),
[L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [iv] "r" (iv),
[L_AES_ARM32_td_ecb] "r" (L_AES_ARM32_td_ecb_c),
[L_AES_ARM32_td4] "r" (L_AES_ARM32_td4_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r8", "r9", "r10", "r11"
);
}
@@ -4701,9 +4760,15 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"subs %[len], %[len], #16\n\t"
"add %[data], %[data], #16\n\t"
"bne L_GCM_gmult_len_start_block_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len),
[L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
:
#else
:
: [x] "r" (x), [m] "r" (m), [data] "r" (data), [len] "r" (len),
[L_GCM_gmult_len_r] "r" (L_GCM_gmult_len_r_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10",
"r11"
);
@@ -4983,10 +5048,17 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"rev r7, r7\n\t"
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
"stm r8, {r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks),
[nr] "+r" (nr), [ctr] "+r" (ctr),
[L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c)
:
#else
:
: [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks),
[nr] "r" (nr), [ctr] "r" (ctr),
[L_AES_ARM32_te_gcm] "r" (L_AES_ARM32_te_gcm_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
}

View File

@@ -80,8 +80,13 @@ void wc_chacha_setiv(word32* x, const byte* iv, word32 counter)
"rev lr, lr\n\t"
#endif /* BIG_ENDIAN_ORDER */
"stm r3, {r4, r12, lr}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter)
:
#else
:
: [x] "r" (x), [iv] "r" (iv), [counter] "r" (counter)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r12", "lr", "r4"
);
}
@@ -139,9 +144,15 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz)
"\n"
"L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t"
"stm %[x], {r4, r5, r12, lr}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz),
[L_chacha_arm32_constants] "+r" (L_chacha_arm32_constants_c)
:
#else
:
: [x] "r" (x), [key] "r" (key), [keySz] "r" (keySz),
[L_chacha_arm32_constants] "r" (L_chacha_arm32_constants_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5"
);
}
@@ -510,8 +521,13 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"\n"
"L_chacha_arm32_crypt_done_%=: \n\t"
"add sp, sp, #52\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len)
:
#else
:
: [ctx] "r" (ctx), [c] "r" (c), [m] "r" (m), [len] "r" (len)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -589,9 +605,15 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len)
"b L_chacha_arm32_over_byte_loop_%=\n\t"
"\n"
"L_chacha_arm32_over_done_%=: \n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [over] "+r" (over), [output] "+r" (output), [input] "+r" (input),
[len] "+r" (len)
:
#else
:
: [over] "r" (over), [output] "r" (output), [input] "r" (input),
[len] "r" (len)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9"
);
}

View File

@@ -74,8 +74,13 @@ void fe_init()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc"
);
}
@@ -288,8 +293,13 @@ void fe_add_sub_op()
"sbc r11, r11, #0\n\t"
"stm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
/* Done Add-Sub */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -336,8 +346,13 @@ void fe_sub_op()
"sbc lr, lr, #0\n\t"
"stm r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
/* Done Sub */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -356,8 +371,13 @@ void fe_sub(fe r, const fe a, const fe b)
__asm__ __volatile__ (
"bl fe_sub_op\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -406,8 +426,13 @@ void fe_add_op()
"adc lr, lr, #0\n\t"
"stm r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
/* Done Add */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -426,8 +451,13 @@ void fe_add(fe r, const fe a, const fe b)
__asm__ __volatile__ (
"bl fe_add_op\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -467,8 +497,13 @@ void fe_frombytes(fe out, const unsigned char* in)
"str r7, [%[out], #20]\n\t"
"str r8, [%[out], #24]\n\t"
"str r9, [%[out], #28]\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [out] "+r" (out), [in] "+r" (in)
:
#else
:
: [out] "r" (out), [in] "r" (in)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
);
}
@@ -517,8 +552,13 @@ void fe_tobytes(unsigned char* out, const fe n)
"str r7, [%[out], #20]\n\t"
"str r8, [%[out], #24]\n\t"
"str r9, [%[out], #28]\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [out] "+r" (out), [n] "+r" (n)
:
#else
:
: [out] "r" (out), [n] "r" (n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12"
);
}
@@ -544,8 +584,13 @@ void fe_1(fe n)
"mov r8, #0\n\t"
"mov r9, #0\n\t"
"stm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [n] "+r" (n)
:
#else
:
: [n] "r" (n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
);
}
@@ -571,8 +616,13 @@ void fe_0(fe n)
"mov r8, #0\n\t"
"mov r9, #0\n\t"
"stm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [n] "+r" (n)
:
#else
:
: [n] "r" (n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
);
}
@@ -636,8 +686,13 @@ void fe_copy(fe r, const fe a)
#else
"strd r4, r5, [%[r], #24]\n\t"
#endif
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5"
);
}
@@ -669,8 +724,13 @@ void fe_neg(fe r, const fe a)
"sbcs r4, lr, r4\n\t"
"sbc r5, r12, r5\n\t"
"stm %[r]!, {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r12", "lr"
);
}
@@ -717,8 +777,13 @@ int fe_isnonzero(const fe a)
"orr r4, r4, r6\n\t"
"orr r2, r2, r8\n\t"
"orr %[a], r2, r4\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a)
:
#else
:
: [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
"r12"
);
@@ -750,8 +815,13 @@ int fe_isnegative(const fe a)
"and %[a], r2, #1\n\t"
"lsr r1, r1, #31\n\t"
"eor %[a], %[a], r1\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a)
:
#else
:
: [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5"
);
return (word32)(size_t)a;
@@ -2291,8 +2361,13 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
#else
"strd r8, r9, [%[r], #88]\n\t"
#endif
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
#else
:
: [r] "r" (r), [base] "r" (base), [b] "r" (b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10",
"r11", "r12", "lr"
);
@@ -2418,8 +2493,13 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
"and r7, r7, lr\n\t"
"stm %[r]!, {r4, r5, r6, r7}\n\t"
"sub %[base], %[base], %[b]\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
#else
:
: [r] "r" (r), [base] "r" (base), [b] "r" (b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -2812,8 +2892,13 @@ void fe_mul_op()
"ldr r0, [sp, #36]\n\t"
"stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
"add sp, sp, #40\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -2961,8 +3046,13 @@ void fe_mul_op()
/* Store */
"stm lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
"add sp, sp, #16\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -2982,8 +3072,13 @@ void fe_mul(fe r, const fe a, const fe b)
__asm__ __volatile__ (
"bl fe_mul_op\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -3266,8 +3361,13 @@ void fe_sq_op()
"ldr r0, [sp, #64]\n\t"
"stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
"add sp, sp, #0x44\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -3401,8 +3501,13 @@ void fe_sq_op()
"pop {lr}\n\t"
/* Store */
"stm lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
:
:
#else
:
:
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -3421,8 +3526,13 @@ void fe_sq(fe r, const fe a)
__asm__ __volatile__ (
"bl fe_sq_op\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12",
"lr", "r10", "r11"
);
@@ -3501,8 +3611,13 @@ void fe_mul121666(fe r, fe a)
"adcs r8, r8, #0\n\t"
"adc r9, r9, #0\n\t"
"stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12",
"lr", "r10"
);
@@ -3567,8 +3682,13 @@ void fe_mul121666(fe r, fe a)
"adcs r8, r8, #0\n\t"
"adc r9, r9, #0\n\t"
"stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12",
"lr", "r10"
);
@@ -3964,8 +4084,13 @@ int curve25519(byte* r, const byte* n, const byte* a)
"bl fe_mul_op\n\t"
"mov r0, #0\n\t"
"add sp, sp, #0xbc\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [n] "r" (n), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r3", "r12", "lr"
);
@@ -4284,8 +4409,13 @@ int curve25519(byte* r, const byte* n, const byte* a)
"stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"mov r0, #0\n\t"
"add sp, sp, #0xc0\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [n] "r" (n), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r3", "r12", "lr"
);
@@ -4465,8 +4595,13 @@ void fe_invert(fe r, const fe a)
"ldr %[a], [sp, #132]\n\t"
"ldr %[r], [sp, #128]\n\t"
"add sp, sp, #0x88\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -4792,8 +4927,13 @@ void fe_sq2(fe r, const fe a)
"ldr r0, [sp, #64]\n\t"
"stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
"add sp, sp, #0x44\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -4977,8 +5117,13 @@ void fe_sq2(fe r, const fe a)
"stm r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
"mov r0, r12\n\t"
"mov r1, lr\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr"
);
}
@@ -5154,8 +5299,13 @@ void fe_pow22523(fe r, const fe a)
"ldr %[a], [sp, #100]\n\t"
"ldr %[r], [sp, #96]\n\t"
"add sp, sp, #0x68\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a)
:
#else
:
: [r] "r" (r), [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -5191,8 +5341,13 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
"add r0, r0, #0x40\n\t"
"bl fe_mul_op\n\t"
"add sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p)
:
#else
:
: [r] "r" (r), [p] "r" (p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -5233,8 +5388,13 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
"add r0, r0, #0x60\n\t"
"bl fe_mul_op\n\t"
"add sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p)
:
#else
:
: [r] "r" (r), [p] "r" (p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -5287,8 +5447,13 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
"mov r1, r0\n\t"
"bl fe_sub_op\n\t"
"add sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p)
:
#else
:
: [r] "r" (r), [p] "r" (p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -5380,8 +5545,13 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"add r1, r0, #32\n\t"
"bl fe_add_sub_op\n\t"
"add sp, sp, #12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
#else
:
: [r] "r" (r), [p] "r" (p), [q] "r" (q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -5474,8 +5644,13 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"add r0, r0, #32\n\t"
"bl fe_add_sub_op\n\t"
"add sp, sp, #12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
#else
:
: [r] "r" (r), [p] "r" (p), [q] "r" (q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -5568,8 +5743,13 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"add r0, r0, #32\n\t"
"bl fe_add_sub_op\n\t"
"add sp, sp, #44\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
#else
:
: [r] "r" (r), [p] "r" (p), [q] "r" (q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -5662,8 +5842,13 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"add r0, r0, #0x40\n\t"
"bl fe_add_sub_op\n\t"
"add sp, sp, #44\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
#else
:
: [r] "r" (r), [p] "r" (p), [q] "r" (q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
@@ -6457,8 +6642,13 @@ void sc_reduce(byte* s)
"ldr %[s], [sp, #52]\n\t"
"stm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"add sp, sp, #56\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [s] "+r" (s)
:
#else
:
: [s] "r" (s)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11", "r12", "lr"
);
@@ -7123,8 +7313,13 @@ void sc_reduce(byte* s)
"ldr %[s], [sp, #52]\n\t"
"stm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"add sp, sp, #56\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [s] "+r" (s)
:
#else
:
: [s] "r" (s)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11", "r12", "lr"
);
@@ -8280,8 +8475,13 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
"str r8, [%[s], #24]\n\t"
"str r9, [%[s], #28]\n\t"
"add sp, sp, #0x50\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
:
#else
:
: [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12", "lr"
);
@@ -9079,8 +9279,13 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
"str r8, [%[s], #24]\n\t"
"str r9, [%[s], #28]\n\t"
"add sp, sp, #0x50\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
:
#else
:
: [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12", "lr"
);

View File

@@ -3095,9 +3095,15 @@ void mlkem_arm32_ntt(sword16* r)
"add %[r], %[r], #32\n\t"
"bne L_mlkem_arm32_ntt_loop_567_%=\n\t"
"add sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_arm32_ntt_zetas] "+r" (L_mlkem_arm32_ntt_zetas_c)
:
#else
:
: [r] "r" (r),
[L_mlkem_arm32_ntt_zetas] "r" (L_mlkem_arm32_ntt_zetas_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -7536,9 +7542,15 @@ void mlkem_arm32_invntt(sword16* r)
"add %[r], %[r], #4\n\t"
"bne L_mlkem_invntt_loop_321_%=\n\t"
"add sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c)
:
#else
:
: [r] "r" (r),
[L_mlkem_invntt_zetas_inv] "r" (L_mlkem_invntt_zetas_inv_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -7870,9 +7882,15 @@ void mlkem_arm32_basemul_mont(sword16* r, const sword16* a, const sword16* b)
"stm %[r]!, {r4, r5}\n\t"
"pop {r8}\n\t"
"bne L_mlkem_basemul_mont_loop_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b),
[L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -8204,9 +8222,15 @@ void mlkem_arm32_basemul_mont_add(sword16* r, const sword16* a,
"stm %[r]!, {r4, r5}\n\t"
"pop {r8}\n\t"
"bne L_mlkem_arm32_basemul_mont_add_loop_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b),
[L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -8393,9 +8417,15 @@ void mlkem_arm32_csubq(sword16* p)
"stm %[p]!, {r2, r3, r4, r5}\n\t"
"subs r1, r1, #8\n\t"
"bne L_mlkem_arm32_csubq_loop_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [p] "+r" (p),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
#else
:
: [p] "r" (p),
[L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);
@@ -8676,9 +8706,15 @@ unsigned int mlkem_arm32_rej_uniform(sword16* p, unsigned int len,
"\n"
"L_mlkem_arm32_rej_uniform_done_%=: \n\t"
"lsr r0, r12, #1\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
#else
:
: [p] "r" (p), [len] "r" (len), [r] "r" (r), [rLen] "r" (rLen),
[L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8"
);
return (word32)(size_t)p;

View File

@@ -281,9 +281,15 @@ void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len,
"\n"
"L_poly1305_arm32_16_done_%=: \n\t"
"add sp, sp, #28\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len),
[notLast] "+r" (notLast)
:
#else
:
: [ctx] "r" (ctx), [m] "r" (m), [len] "r" (len),
[notLast] "r" (notLast)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -342,9 +348,15 @@ void poly1305_set_key(Poly1305* ctx, const byte* key)
"stm lr, {r5, r6, r7, r8, r12}\n\t"
/* Zero leftover */
"str r5, [%[ctx], #52]\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [key] "+r" (key),
[L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c)
:
#else
:
: [ctx] "r" (ctx), [key] "r" (key),
[L_poly1305_arm32_clamp] "r" (L_poly1305_arm32_clamp_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8"
);
}
@@ -404,8 +416,13 @@ void poly1305_final(Poly1305* ctx, byte* mac)
/* Zero out padding. */
"add r9, %[ctx], #36\n\t"
"stm r9, {r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [mac] "+r" (mac)
:
#else
:
: [ctx] "r" (ctx), [mac] "r" (mac)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9"
);
@@ -636,9 +653,15 @@ void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len,
"\n"
"L_poly1305_arm32_16_done_%=: \n\t"
"add sp, sp, #28\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len),
[notLast] "+r" (notLast)
:
#else
:
: [ctx] "r" (ctx), [m] "r" (m), [len] "r" (len),
[notLast] "r" (notLast)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11"
);
@@ -1100,8 +1123,13 @@ void poly1305_arm32_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes)
"stm r12, {r7, r8, r9, r10, r11}\n\t"
"\n"
"L_poly1305_arm32_blocks_done_%=: \n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [m] "+r" (m), [bytes] "+r" (bytes)
:
#else
:
: [ctx] "r" (ctx), [m] "r" (m), [bytes] "r" (bytes)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8",
"d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18",
@@ -1340,9 +1368,15 @@ void poly1305_set_key(Poly1305* ctx, const byte* key)
"stm lr, {r4, r5, r6, r7, r8, r9}\n\t"
/* Zero leftover */
"str r5, [%[ctx], #56]\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [key] "+r" (key),
[L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c)
:
#else
:
: [ctx] "r" (ctx), [key] "r" (key),
[L_poly1305_arm32_clamp] "r" (L_poly1305_arm32_clamp_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9",
"r10", "r11", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8",
"d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18",
@@ -1405,8 +1439,13 @@ void poly1305_final(Poly1305* ctx, byte* mac)
/* Zero out padding. */
"add r9, %[ctx], #40\n\t"
"stm r9, {r4, r5, r6, r7}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [mac] "+r" (mac)
:
#else
:
: [ctx] "r" (ctx), [mac] "r" (mac)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9"
);

View File

@@ -1742,9 +1742,15 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
"add %[data], %[data], #0x40\n\t"
"bne L_SHA256_transform_len_begin_%=\n\t"
"add sp, sp, #0xc0\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len),
[L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
#else
:
: [sha256] "r" (sha256), [data] "r" (data), [len] "r" (len),
[L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
@@ -2814,9 +2820,15 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
"str r10, [sp, #8]\n\t"
"bne L_SHA256_transform_neon_len_begin_%=\n\t"
"add sp, sp, #24\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len),
[L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c)
:
#else
:
: [sha256] "r" (sha256), [data] "r" (data), [len] "r" (len),
[L_SHA256_transform_neon_len_k] "r" (L_SHA256_transform_neon_len_k_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr",
"r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9",
"d10", "d11"

View File

@@ -346,9 +346,15 @@ void BlockSha3(word64* state)
"vst1.8 {d20-d23}, [%[state]]!\n\t"
"vst1.8 {d24}, [%[state]]\n\t"
"add sp, sp, #16\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [state] "+r" (state),
[L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
:
#else
:
: [state] "r" (state),
[L_sha3_arm2_neon_rt] "r" (L_sha3_arm2_neon_rt_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16",
"d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
@@ -2354,8 +2360,13 @@ void BlockSha3(word64* state)
"subs r2, r2, #1\n\t"
"bne L_sha3_arm32_begin_%=\n\t"
"add sp, sp, #0xcc\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [state] "+r" (state), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
:
#else
:
: [state] "r" (state), [L_sha3_arm2_rt] "r" (L_sha3_arm2_rt_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
);

View File

@@ -7534,9 +7534,15 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"bne L_SHA512_transform_len_begin_%=\n\t"
"eor r0, r0, r0\n\t"
"add sp, sp, #0xc0\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len),
[L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
:
#else
:
: [sha512] "r" (sha512), [data] "r" (data), [len] "r" (len),
[L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
@@ -9099,9 +9105,15 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"subs %[len], %[len], #0x80\n\t"
"sub r3, r3, #0x280\n\t"
"bne L_SHA512_transform_neon_len_begin_%=\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len),
[L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c)
:
#else
:
: [sha512] "r" (sha512), [data] "r" (data), [len] "r" (len),
[L_SHA512_transform_neon_len_k] "r" (L_SHA512_transform_neon_len_k_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15"