Merge pull request #10023 from SparkiDev/asm_gen_fixes_2

ASM generation fixes
This commit is contained in:
David Garske
2026-03-20 08:00:57 -07:00
committed by GitHub
22 changed files with 493 additions and 467 deletions
+2 -3
View File
@@ -1444,7 +1444,6 @@ AES_XTS_init_avx1:
.p2align 4
_AES_XTS_init_avx1:
#endif /* __APPLE__ */
movl %edx, %eax
vmovdqu (%rdi), %xmm0
# aes_enc_block
vpxor (%rsi), %xmm0, %xmm0
@@ -1466,13 +1465,13 @@ _AES_XTS_init_avx1:
vaesenc %xmm2, %xmm0, %xmm0
vmovdqu 144(%rsi), %xmm2
vaesenc %xmm2, %xmm0, %xmm0
cmpl $11, %eax
cmpl $11, %edx
vmovdqu 160(%rsi), %xmm2
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc %xmm2, %xmm0, %xmm0
vmovdqu 176(%rsi), %xmm3
vaesenc %xmm3, %xmm0, %xmm0
cmpl $13, %eax
cmpl $13, %edx
vmovdqu 192(%rsi), %xmm2
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc %xmm2, %xmm0, %xmm0
+2 -3
View File
@@ -1456,7 +1456,6 @@ _text ENDS
IFDEF HAVE_INTEL_AVX1
_text SEGMENT READONLY PARA
AES_XTS_init_avx1 PROC
mov eax, r8d
vmovdqu xmm0, OWORD PTR [rcx]
; aes_enc_block
vpxor xmm0, xmm0, [rdx]
@@ -1478,13 +1477,13 @@ AES_XTS_init_avx1 PROC
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rdx+144]
vaesenc xmm0, xmm0, xmm2
cmp eax, 11
cmp r8d, 11
vmovdqu xmm2, OWORD PTR [rdx+160]
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm3, OWORD PTR [rdx+176]
vaesenc xmm0, xmm0, xmm3
cmp eax, 13
cmp r8d, 13
vmovdqu xmm2, OWORD PTR [rdx+192]
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc xmm0, xmm0, xmm2
+1 -1
View File
@@ -12372,7 +12372,7 @@ _fe_cmov_table_avx2:
pushq %r15
pushq %rbx
movq %rdx, %rcx
xor %rbx, %rbx
xorq %rbx, %rbx
movsbq %cl, %rax
cdq
xorb %dl, %al
+3 -3
View File
@@ -12555,7 +12555,7 @@ L_AES_CBC_encrypt_block_nr_256:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13269,7 +13269,7 @@ L_AES_CBC_encrypt_block_nr_192:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13983,7 +13983,7 @@ L_AES_CBC_encrypt_block_nr_128:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
+3 -3
View File
@@ -12930,7 +12930,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13647,7 +13647,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -14364,7 +14364,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
+14 -14
View File
@@ -89,14 +89,14 @@ wc_chacha_setkey:
#endif /* BIG_ENDIAN_ORDER */
stm r0!, {r4, r5, r12, lr}
# Next 16 bytes of key.
beq L_chacha_arm32_setkey_same_keyb_ytes
beq L_chacha_arm32_setkey_same_key_bytes
# Update key pointer for next 16 bytes.
add r1, r1, r2
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r12, [r1, #8]
ldr lr, [r1, #12]
L_chacha_arm32_setkey_same_keyb_ytes:
L_chacha_arm32_setkey_same_key_bytes:
stm r0, {r4, r5, r12, lr}
pop {r4, r5, pc}
.size wc_chacha_setkey,.-wc_chacha_setkey
@@ -572,11 +572,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
ror r11, r11, #16
veor q7, q7, q4
add r8, r8, r10
vrev32.i16 q15, q15
vrev32.16 q15, q15
add r9, r9, r11
vrev32.i16 q3, q3
vrev32.16 q3, q3
eor r4, r4, r8
vrev32.i16 q7, q7
vrev32.16 q7, q7
eor r5, r5, r9
# c += d; b ^= c; b <<<= 12;
vadd.i32 q14, q14, q15
@@ -685,11 +685,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
ror r10, r10, #16
veor q7, q7, q4
add r8, r8, r11
vrev32.i16 q15, q15
vrev32.16 q15, q15
add r9, r9, r10
vrev32.i16 q3, q3
vrev32.16 q3, q3
eor r5, r5, r8
vrev32.i16 q7, q7
vrev32.16 q7, q7
eor r6, r6, r9
# c += d; b ^= c; b <<<= 12;
vadd.i32 q14, q14, q15
@@ -891,8 +891,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
vadd.i32 q4, q4, q5
veor q3, q3, q0
veor q7, q7, q4
vrev32.i16 q3, q3
vrev32.i16 q7, q7
vrev32.16 q3, q3
vrev32.16 q7, q7
# c += d; b ^= c; b <<<= 12;
vadd.i32 q2, q2, q3
vadd.i32 q6, q6, q7
@@ -932,8 +932,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
vadd.i32 q4, q4, q5
veor q3, q3, q0
veor q7, q7, q4
vrev32.i16 q3, q3
vrev32.i16 q7, q7
vrev32.16 q3, q3
vrev32.16 q7, q7
# c += d; b ^= c; b <<<= 12;
vadd.i32 q2, q2, q3
vadd.i32 q6, q6, q7
@@ -1178,13 +1178,13 @@ wc_chacha_setkey:
vldm r3, {q0}
vld1.8 {q1}, [r1]!
#ifdef BIG_ENDIAN_ORDER
vrev32.i16 q1, q1
vrev32.16 q1, q1
#endif /* BIG_ENDIAN_ORDER */
vstm r0!, {q0, q1}
beq L_chacha_setkey_arm32_done
vld1.8 {q1}, [r1]
#ifdef BIG_ENDIAN_ORDER
vrev32.i16 q1, q1
vrev32.16 q1, q1
#endif /* BIG_ENDIAN_ORDER */
L_chacha_setkey_arm32_done:
vstm r0, {q1}
+14 -14
View File
@@ -134,7 +134,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
#endif /* BIG_ENDIAN_ORDER */
"stm %[x]!, {r4, r5, r12, lr}\n\t"
/* Next 16 bytes of key. */
"beq L_chacha_arm32_setkey_same_keyb_ytes_%=\n\t"
"beq L_chacha_arm32_setkey_same_key_bytes_%=\n\t"
/* Update key pointer for next 16 bytes. */
"add %[key], %[key], %[keySz]\n\t"
"ldr r4, [%[key]]\n\t"
@@ -142,7 +142,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
"ldr r12, [%[key], #8]\n\t"
"ldr lr, [%[key], #12]\n\t"
"\n"
"L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t"
"L_chacha_arm32_setkey_same_key_bytes_%=: \n\t"
"stm %[x], {r4, r5, r12, lr}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz),
@@ -693,11 +693,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"ror r11, r11, #16\n\t"
"veor q7, q7, q4\n\t"
"add r8, r8, r10\n\t"
"vrev32.i16 q15, q15\n\t"
"vrev32.16 q15, q15\n\t"
"add r9, r9, r11\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.16 q3, q3\n\t"
"eor r4, r4, r8\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q7, q7\n\t"
"eor r5, r5, r9\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q14, q14, q15\n\t"
@@ -806,11 +806,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"ror r10, r10, #16\n\t"
"veor q7, q7, q4\n\t"
"add r8, r8, r11\n\t"
"vrev32.i16 q15, q15\n\t"
"vrev32.16 q15, q15\n\t"
"add r9, r9, r10\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.16 q3, q3\n\t"
"eor r5, r5, r8\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q7, q7\n\t"
"eor r6, r6, r9\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q14, q14, q15\n\t"
@@ -1014,8 +1014,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"vadd.i32 q4, q4, q5\n\t"
"veor q3, q3, q0\n\t"
"veor q7, q7, q4\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q3, q3\n\t"
"vrev32.16 q7, q7\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q2, q2, q3\n\t"
"vadd.i32 q6, q6, q7\n\t"
@@ -1055,8 +1055,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"vadd.i32 q4, q4, q5\n\t"
"veor q3, q3, q0\n\t"
"veor q7, q7, q4\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q3, q3\n\t"
"vrev32.16 q7, q7\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q2, q2, q3\n\t"
"vadd.i32 q6, q6, q7\n\t"
@@ -1329,13 +1329,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
"vldm r3, {q0}\n\t"
"vld1.8 {q1}, [%[key]]!\n\t"
#ifdef BIG_ENDIAN_ORDER
"vrev32.i16 q1, q1\n\t"
"vrev32.16 q1, q1\n\t"
#endif /* BIG_ENDIAN_ORDER */
"vstm %[x]!, {q0-q1}\n\t"
"beq L_chacha_setkey_arm32_done_%=\n\t"
"vld1.8 {q1}, [%[key]]\n\t"
#ifdef BIG_ENDIAN_ORDER
"vrev32.i16 q1, q1\n\t"
"vrev32.16 q1, q1\n\t"
#endif /* BIG_ENDIAN_ORDER */
"\n"
"L_chacha_setkey_arm32_done_%=: \n\t"
+92 -92
View File
@@ -185,7 +185,7 @@ mlkem_arm32_ntt:
#else
movt r10, #0xcff
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
mov r2, #16
L_mlkem_arm32_ntt_loop_123:
str r2, [sp]
@@ -288,7 +288,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r7
smulbt r7, r11, r7
@@ -379,7 +379,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r8
smulbt r8, r11, r8
@@ -470,7 +470,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r9
smulbt r9, r11, r9
@@ -561,7 +561,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #4]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r4
@@ -653,7 +653,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r5
smulbt r5, r11, r5
@@ -744,7 +744,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r8
smultt r8, r11, r8
@@ -834,7 +834,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r9
smultt r9, r11, r9
@@ -924,7 +924,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #8]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r3
@@ -1016,7 +1016,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r5
smultt r5, r11, r5
@@ -1106,7 +1106,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #12]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r7
@@ -1198,7 +1198,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r9
smultt r9, r11, r9
@@ -1288,7 +1288,7 @@ L_mlkem_arm32_ntt_loop_123:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #64]
str r4, [r0, #128]
@@ -1408,7 +1408,7 @@ L_mlkem_arm32_ntt_loop_4_i:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r5
smulbt r5, r11, r5
@@ -1499,7 +1499,7 @@ L_mlkem_arm32_ntt_loop_4_i:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r8
smultt r8, r11, r8
@@ -1589,7 +1589,7 @@ L_mlkem_arm32_ntt_loop_4_i:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r9
smultt r9, r11, r9
@@ -1679,7 +1679,7 @@ L_mlkem_arm32_ntt_loop_4_i:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #16]
str r4, [r0, #32]
@@ -1804,7 +1804,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r7
smulbt r7, r11, r7
@@ -1895,7 +1895,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r8
smulbt r8, r11, r8
@@ -1986,7 +1986,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r9
smulbt r9, r11, r9
@@ -2077,7 +2077,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #2
ldr r11, [r11, #64]
@@ -2171,7 +2171,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r5
smulbt r5, r11, r5
@@ -2262,7 +2262,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r8
smultt r8, r11, r8
@@ -2352,7 +2352,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r9
smultt r9, r11, r9
@@ -2442,7 +2442,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #1
ldr r11, [r11, #128]
@@ -2536,7 +2536,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r5
smultt r5, r11, r5
@@ -2626,7 +2626,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #1
ldr r11, [r11, #132]
@@ -2720,7 +2720,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smultb r12, r11, r9
smultt r9, r11, r9
@@ -2810,7 +2810,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
mov r11, #0xc0
@@ -2836,7 +2836,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
mov r10, #0xd01
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r2
smulwt lr, r11, r2
@@ -2872,7 +2872,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r2, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r3
smulwt lr, r11, r3
@@ -2908,7 +2908,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r3, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r4
smulwt lr, r11, r4
@@ -2944,7 +2944,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r4, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r5
smulwt lr, r11, r5
@@ -2980,7 +2980,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r5, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r6
smulwt lr, r11, r6
@@ -3016,7 +3016,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r6, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r7
smulwt lr, r11, r7
@@ -3052,7 +3052,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r7, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r8
smulwt lr, r11, r8
@@ -3088,7 +3088,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r8, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r9
smulwt lr, r11, r9
@@ -3124,7 +3124,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
bfi r9, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
mov r10, #0x1
@@ -3138,7 +3138,7 @@ L_mlkem_arm32_ntt_loop_567:
#else
movt r10, #0xcff
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #4]
str r4, [r0, #8]
@@ -3309,7 +3309,7 @@ mlkem_arm32_invntt:
#else
movt r10, #0xcff
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
mov r3, #0
L_mlkem_invntt_loop_765:
add r11, r1, r3, lsr #1
@@ -3439,7 +3439,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r4, r5
sadd16 r4, r4, r5
@@ -3555,7 +3555,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #1
ldr r11, [r11, #4]
@@ -3675,7 +3675,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r8, r9
sadd16 r8, r8, r9
@@ -3791,7 +3791,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #2
ldr r11, [r11, #128]
@@ -3911,7 +3911,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r3, r5
sadd16 r3, r3, r5
@@ -4028,7 +4028,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r6, r8
sadd16 r6, r6, r8
@@ -4144,7 +4144,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r7, r9
sadd16 r7, r7, r9
@@ -4260,7 +4260,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [sp, #4]
add r11, r1, r11, lsr #3
ldr r11, [r11, #192]
@@ -4380,7 +4380,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r3, r7
sadd16 r3, r3, r7
@@ -4497,7 +4497,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r4, r8
sadd16 r4, r4, r8
@@ -4614,7 +4614,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r5, r9
sadd16 r5, r5, r9
@@ -4731,7 +4731,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
mov r11, #0xc0
@@ -4751,7 +4751,7 @@ L_mlkem_invntt_loop_765:
#else
mov r11, #0x4ebf
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r2
smulwt lr, r11, r2
@@ -4787,7 +4787,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r2, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r3
smulwt lr, r11, r3
@@ -4823,7 +4823,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r3, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r4
smulwt lr, r11, r4
@@ -4859,7 +4859,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r4, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r5
smulwt lr, r11, r5
@@ -4895,7 +4895,7 @@ L_mlkem_invntt_loop_765:
#else
bfi r5, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #4]
str r4, [r0, #8]
@@ -5042,7 +5042,7 @@ L_mlkem_invntt_loop_4_i:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r3, r5
sadd16 r3, r3, r5
@@ -5159,7 +5159,7 @@ L_mlkem_invntt_loop_4_i:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r6, r8
sadd16 r6, r6, r8
@@ -5275,7 +5275,7 @@ L_mlkem_invntt_loop_4_i:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r7, r9
sadd16 r7, r7, r9
@@ -5391,7 +5391,7 @@ L_mlkem_invntt_loop_4_i:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #16]
str r4, [r0, #32]
@@ -5542,7 +5542,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r4, r5
sadd16 r4, r4, r5
@@ -5658,7 +5658,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #244]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r6, r7
@@ -5776,7 +5776,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r8, r9
sadd16 r8, r8, r9
@@ -5892,7 +5892,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #248]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r2, r4
@@ -6010,7 +6010,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r3, r5
sadd16 r3, r3, r5
@@ -6127,7 +6127,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r6, r8
sadd16 r6, r6, r8
@@ -6243,7 +6243,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r7, r9
sadd16 r7, r7, r9
@@ -6359,7 +6359,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
mov r11, #0xc0
@@ -6379,7 +6379,7 @@ L_mlkem_invntt_loop_321:
#else
mov r11, #0x4ebf
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r2
smulwt lr, r11, r2
@@ -6415,7 +6415,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r2, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r3
smulwt lr, r11, r3
@@ -6451,7 +6451,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r3, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r4
smulwt lr, r11, r4
@@ -6487,7 +6487,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r4, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulwb r12, r11, r5
smulwt lr, r11, r5
@@ -6523,7 +6523,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r5, lr, #16, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #252]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r2, r6
@@ -6641,7 +6641,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r3, r7
sadd16 r3, r3, r7
@@ -6758,7 +6758,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r4, r8
sadd16 r4, r4, r8
@@ -6875,7 +6875,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
ssub16 r12, r5, r9
sadd16 r5, r5, r9
@@ -6992,7 +6992,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
ldr r11, [r1, #254]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r2
@@ -7076,7 +7076,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r2, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r3
smulbt r3, r11, r3
@@ -7159,7 +7159,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r3, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r4
smulbt r4, r11, r4
@@ -7242,7 +7242,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r4, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r5
smulbt r5, r11, r5
@@ -7325,7 +7325,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r5, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r6
smulbt r6, r11, r6
@@ -7408,7 +7408,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r6, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r7
smulbt r7, r11, r7
@@ -7491,7 +7491,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r7, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r8
smulbt r8, r11, r8
@@ -7574,7 +7574,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r8, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
smulbb r12, r11, r9
smulbt r9, r11, r9
@@ -7657,7 +7657,7 @@ L_mlkem_invntt_loop_321:
#else
bfi r9, r12, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
str r2, [r0]
str r3, [r0, #64]
str r4, [r0, #128]
@@ -7827,7 +7827,7 @@ mlkem_arm32_basemul_mont:
#else
movt r12, #0xcff
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
mov r8, #0
L_mlkem_basemul_mont_loop:
ldm r1!, {r4, r5}
@@ -8078,7 +8078,7 @@ L_mlkem_basemul_mont_loop:
#endif
orr r4, r9, r8, lsr #16
orr r5, r11, r10, lsr #16
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
stm r0!, {r4, r5}
pop {r8}
bne L_mlkem_basemul_mont_loop
@@ -8105,7 +8105,7 @@ mlkem_arm32_basemul_mont_add:
#else
movt r12, #0xcff
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
mov r8, #0
L_mlkem_arm32_basemul_mont_add_loop:
ldm r1!, {r4, r5}
@@ -8390,7 +8390,7 @@ L_mlkem_arm32_basemul_mont_add_loop:
#else
bfi r5, r10, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
stm r0!, {r4, r5}
pop {r8}
bne L_mlkem_arm32_basemul_mont_add_loop
@@ -8421,7 +8421,7 @@ mlkem_arm32_csubq:
#else
movt lr, #0xd01
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
mov r11, #0x8000
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
orr r11, r11, #0x80000000
@@ -8561,7 +8561,7 @@ L_mlkem_arm32_csubq_loop:
#else
bfi r5, r10, #0, #16
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
stm r0!, {r2, r3, r4, r5}
subs r1, r1, #8
bne L_mlkem_arm32_csubq_loop
+92 -92
View File
@@ -118,7 +118,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"movt r10, #0xcff\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"mov r2, #16\n\t"
"\n"
"L_mlkem_arm32_ntt_loop_123_%=: \n\t"
@@ -222,7 +222,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r7\n\t"
"smulbt r7, r11, r7\n\t"
@@ -313,7 +313,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r8\n\t"
"smulbt r8, r11, r8\n\t"
@@ -404,7 +404,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r9\n\t"
"smulbt r9, r11, r9\n\t"
@@ -495,7 +495,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #4]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r4\n\t"
@@ -587,7 +587,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r5\n\t"
"smulbt r5, r11, r5\n\t"
@@ -678,7 +678,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r8\n\t"
"smultt r8, r11, r8\n\t"
@@ -768,7 +768,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r9\n\t"
"smultt r9, r11, r9\n\t"
@@ -858,7 +858,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #8]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r3\n\t"
@@ -950,7 +950,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r5\n\t"
"smultt r5, r11, r5\n\t"
@@ -1040,7 +1040,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #12]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r7\n\t"
@@ -1132,7 +1132,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r9\n\t"
"smultt r9, r11, r9\n\t"
@@ -1222,7 +1222,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #64]\n\t"
"str r4, [%[r], #128]\n\t"
@@ -1344,7 +1344,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r5\n\t"
"smulbt r5, r11, r5\n\t"
@@ -1435,7 +1435,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r8\n\t"
"smultt r8, r11, r8\n\t"
@@ -1525,7 +1525,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r9\n\t"
"smultt r9, r11, r9\n\t"
@@ -1615,7 +1615,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #16]\n\t"
"str r4, [%[r], #32]\n\t"
@@ -1741,7 +1741,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r7\n\t"
"smulbt r7, r11, r7\n\t"
@@ -1832,7 +1832,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r8\n\t"
"smulbt r8, r11, r8\n\t"
@@ -1923,7 +1923,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r9\n\t"
"smulbt r9, r11, r9\n\t"
@@ -2014,7 +2014,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #2\n\t"
"ldr r11, [r11, #64]\n\t"
@@ -2108,7 +2108,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r5\n\t"
"smulbt r5, r11, r5\n\t"
@@ -2199,7 +2199,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r8\n\t"
"smultt r8, r11, r8\n\t"
@@ -2289,7 +2289,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r9\n\t"
"smultt r9, r11, r9\n\t"
@@ -2379,7 +2379,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #1\n\t"
"ldr r11, [r11, #128]\n\t"
@@ -2473,7 +2473,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r5\n\t"
"smultt r5, r11, r5\n\t"
@@ -2563,7 +2563,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #1\n\t"
"ldr r11, [r11, #132]\n\t"
@@ -2657,7 +2657,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smultb r12, r11, r9\n\t"
"smultt r9, r11, r9\n\t"
@@ -2747,7 +2747,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"mov r11, #0xc0\n\t"
@@ -2773,7 +2773,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"mov r10, #0xd01\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r2\n\t"
"smulwt lr, r11, r2\n\t"
@@ -2809,7 +2809,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r2, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r3\n\t"
"smulwt lr, r11, r3\n\t"
@@ -2845,7 +2845,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r3, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r4\n\t"
"smulwt lr, r11, r4\n\t"
@@ -2881,7 +2881,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r4, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r5\n\t"
"smulwt lr, r11, r5\n\t"
@@ -2917,7 +2917,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r5, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r6\n\t"
"smulwt lr, r11, r6\n\t"
@@ -2953,7 +2953,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r6, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r7\n\t"
"smulwt lr, r11, r7\n\t"
@@ -2989,7 +2989,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r7, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r8\n\t"
"smulwt lr, r11, r8\n\t"
@@ -3025,7 +3025,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r8, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r9\n\t"
"smulwt lr, r11, r9\n\t"
@@ -3061,7 +3061,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"bfi r9, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"mov r10, #0x1\n\t"
@@ -3075,7 +3075,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r)
#else
"movt r10, #0xcff\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #4]\n\t"
"str r4, [%[r], #8]\n\t"
@@ -3170,7 +3170,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"movt r10, #0xcff\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"mov r3, #0\n\t"
"\n"
"L_mlkem_invntt_loop_765_%=: \n\t"
@@ -3301,7 +3301,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r4, r5\n\t"
"sadd16 r4, r4, r5\n\t"
@@ -3417,7 +3417,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #1\n\t"
"ldr r11, [r11, #4]\n\t"
@@ -3537,7 +3537,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r8, r9\n\t"
"sadd16 r8, r8, r9\n\t"
@@ -3653,7 +3653,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #2\n\t"
"ldr r11, [r11, #128]\n\t"
@@ -3773,7 +3773,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r3, r5\n\t"
"sadd16 r3, r3, r5\n\t"
@@ -3890,7 +3890,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r6, r8\n\t"
"sadd16 r6, r6, r8\n\t"
@@ -4006,7 +4006,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r7, r9\n\t"
"sadd16 r7, r7, r9\n\t"
@@ -4122,7 +4122,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [sp, #4]\n\t"
"add r11, r1, r11, lsr #3\n\t"
"ldr r11, [r11, #192]\n\t"
@@ -4242,7 +4242,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r3, r7\n\t"
"sadd16 r3, r3, r7\n\t"
@@ -4359,7 +4359,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r4, r8\n\t"
"sadd16 r4, r4, r8\n\t"
@@ -4476,7 +4476,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r5, r9\n\t"
"sadd16 r5, r5, r9\n\t"
@@ -4593,7 +4593,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"mov r11, #0xc0\n\t"
@@ -4613,7 +4613,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"mov r11, #0x4ebf\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r2\n\t"
"smulwt lr, r11, r2\n\t"
@@ -4649,7 +4649,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r2, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r3\n\t"
"smulwt lr, r11, r3\n\t"
@@ -4685,7 +4685,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r3, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r4\n\t"
"smulwt lr, r11, r4\n\t"
@@ -4721,7 +4721,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r5\n\t"
"smulwt lr, r11, r5\n\t"
@@ -4757,7 +4757,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #4]\n\t"
"str r4, [%[r], #8]\n\t"
@@ -4906,7 +4906,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r3, r5\n\t"
"sadd16 r3, r3, r5\n\t"
@@ -5023,7 +5023,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r6, r8\n\t"
"sadd16 r6, r6, r8\n\t"
@@ -5139,7 +5139,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r7, r9\n\t"
"sadd16 r7, r7, r9\n\t"
@@ -5255,7 +5255,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #16]\n\t"
"str r4, [%[r], #32]\n\t"
@@ -5407,7 +5407,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r4, r5\n\t"
"sadd16 r4, r4, r5\n\t"
@@ -5523,7 +5523,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #244]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r6, r7\n\t"
@@ -5641,7 +5641,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r8, r9\n\t"
"sadd16 r8, r8, r9\n\t"
@@ -5757,7 +5757,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #248]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r2, r4\n\t"
@@ -5875,7 +5875,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r3, r5\n\t"
"sadd16 r3, r3, r5\n\t"
@@ -5992,7 +5992,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r6, r8\n\t"
"sadd16 r6, r6, r8\n\t"
@@ -6108,7 +6108,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r7, r9\n\t"
"sadd16 r7, r7, r9\n\t"
@@ -6224,7 +6224,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"mov r11, #0xc0\n\t"
@@ -6244,7 +6244,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"mov r11, #0x4ebf\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r2\n\t"
"smulwt lr, r11, r2\n\t"
@@ -6280,7 +6280,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r2, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r3\n\t"
"smulwt lr, r11, r3\n\t"
@@ -6316,7 +6316,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r3, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r4\n\t"
"smulwt lr, r11, r4\n\t"
@@ -6352,7 +6352,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulwb r12, r11, r5\n\t"
"smulwt lr, r11, r5\n\t"
@@ -6388,7 +6388,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, lr, #16, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #252]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r2, r6\n\t"
@@ -6506,7 +6506,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r3, r7\n\t"
"sadd16 r3, r3, r7\n\t"
@@ -6623,7 +6623,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r4, r8\n\t"
"sadd16 r4, r4, r8\n\t"
@@ -6740,7 +6740,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"ssub16 r12, r5, r9\n\t"
"sadd16 r5, r5, r9\n\t"
@@ -6857,7 +6857,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"ldr r11, [r1, #254]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r2\n\t"
@@ -6941,7 +6941,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r2, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r3\n\t"
"smulbt r3, r11, r3\n\t"
@@ -7024,7 +7024,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r3, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r4\n\t"
"smulbt r4, r11, r4\n\t"
@@ -7107,7 +7107,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r4, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r5\n\t"
"smulbt r5, r11, r5\n\t"
@@ -7190,7 +7190,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r5, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r6\n\t"
"smulbt r6, r11, r6\n\t"
@@ -7273,7 +7273,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r6, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r7\n\t"
"smulbt r7, r11, r7\n\t"
@@ -7356,7 +7356,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r7, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r8\n\t"
"smulbt r8, r11, r8\n\t"
@@ -7439,7 +7439,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r8, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6)
"smulbb r12, r11, r9\n\t"
"smulbt r9, r11, r9\n\t"
@@ -7522,7 +7522,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r)
#else
"bfi r9, r12, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"str r2, [%[r]]\n\t"
"str r3, [%[r], #64]\n\t"
"str r4, [%[r], #128]\n\t"
@@ -7620,7 +7620,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r,
#else
"movt r12, #0xcff\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"mov r8, #0\n\t"
"\n"
"L_mlkem_basemul_mont_loop_%=: \n\t"
@@ -7872,7 +7872,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r,
#endif
"orr r4, r9, r8, lsr #16\n\t"
"orr r5, r11, r10, lsr #16\n\t"
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"stm %[r]!, {r4, r5}\n\t"
"pop {r8}\n\t"
"bne L_mlkem_basemul_mont_loop_%=\n\t"
@@ -7925,7 +7925,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r,
#else
"movt r12, #0xcff\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"mov r8, #0\n\t"
"\n"
"L_mlkem_arm32_basemul_mont_add_loop_%=: \n\t"
@@ -8211,7 +8211,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r,
#else
"bfi r5, r10, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"stm %[r]!, {r4, r5}\n\t"
"pop {r8}\n\t"
"bne L_mlkem_arm32_basemul_mont_add_loop_%=\n\t"
@@ -8264,7 +8264,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p)
#else
"movt lr, #0xd01\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"mov r11, #0x8000\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"orr r11, r11, #0x80000000\n\t"
@@ -8405,7 +8405,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p)
#else
"bfi r5, r10, #0, #16\n\t"
#endif
#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */
"stm %[p]!, {r2, r3, r4, r5}\n\t"
"subs r1, r1, #8\n\t"
"bne L_mlkem_arm32_csubq_loop_%=\n\t"
+8 -8
View File
@@ -33,10 +33,10 @@
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_ARMASM_NO_NEON
.text
.type L_sha3_arm2_neon_rt, %object
.size L_sha3_arm2_neon_rt, 192
.type L_sha3_arm32_neon_rt, %object
.size L_sha3_arm32_neon_rt, 192
.align 4
L_sha3_arm2_neon_rt:
L_sha3_arm32_neon_rt:
.word 0x1
.word 0x0
.word 0x8082
@@ -92,7 +92,7 @@ L_sha3_arm2_neon_rt:
BlockSha3:
vpush {d8-d15}
sub sp, sp, #16
adr r1, L_sha3_arm2_neon_rt
adr r1, L_sha3_arm32_neon_rt
mov r2, #24
mov r3, sp
vld1.8 {d0-d3}, [r0]!
@@ -354,10 +354,10 @@ L_sha3_arm32_neon_begin:
#endif /* WOLFSSL_ARMASM_NO_NEON */
#ifdef WOLFSSL_ARMASM_NO_NEON
.text
.type L_sha3_arm2_rt, %object
.size L_sha3_arm2_rt, 192
.type L_sha3_arm32_rt, %object
.size L_sha3_arm32_rt, 192
.align 4
L_sha3_arm2_rt:
L_sha3_arm32_rt:
.word 0x1
.word 0x0
.word 0x8082
@@ -413,7 +413,7 @@ L_sha3_arm2_rt:
BlockSha3:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0xcc
adr r1, L_sha3_arm2_rt
adr r1, L_sha3_arm32_rt
mov r2, #12
L_sha3_arm32_begin:
str r2, [sp, #200]
+13 -13
View File
@@ -51,7 +51,7 @@
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_ARMASM_NO_NEON
static const word64 L_sha3_arm2_neon_rt[] = {
static const word64 L_sha3_arm32_neon_rt[] = {
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
0x000000000000808bUL, 0x0000000080000001UL,
@@ -76,15 +76,15 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register word64* state asm ("r0") = (word64*)state_p;
register word64* L_sha3_arm2_neon_rt_c asm ("r1") =
(word64*)&L_sha3_arm2_neon_rt;
register word64* L_sha3_arm32_neon_rt_c asm ("r1") =
(word64*)&L_sha3_arm32_neon_rt;
#else
register word64* L_sha3_arm2_neon_rt_c = (word64*)&L_sha3_arm2_neon_rt;
register word64* L_sha3_arm32_neon_rt_c = (word64*)&L_sha3_arm32_neon_rt;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"sub sp, sp, #16\n\t"
"mov r1, %[L_sha3_arm2_neon_rt]\n\t"
"mov r1, %[L_sha3_arm32_neon_rt]\n\t"
"mov r2, #24\n\t"
"mov r3, sp\n\t"
"vld1.8 {d0-d3}, [%[state]]!\n\t"
@@ -343,12 +343,12 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"add sp, sp, #16\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [state] "+r" (state),
[L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
[L_sha3_arm32_neon_rt] "+r" (L_sha3_arm32_neon_rt_c)
:
#else
:
: [state] "r" (state),
[L_sha3_arm2_neon_rt] "r" (L_sha3_arm2_neon_rt_c)
[L_sha3_arm32_neon_rt] "r" (L_sha3_arm32_neon_rt_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16",
@@ -359,7 +359,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
#endif /* WOLFSSL_ARMASM_NO_NEON */
#ifdef WOLFSSL_ARMASM_NO_NEON
static const word64 L_sha3_arm2_rt[] = {
static const word64 L_sha3_arm32_rt[] = {
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
0x000000000000808bUL, 0x0000000080000001UL,
@@ -384,14 +384,14 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register word64* state asm ("r0") = (word64*)state_p;
register word64* L_sha3_arm2_rt_c asm ("r1") = (word64*)&L_sha3_arm2_rt;
register word64* L_sha3_arm32_rt_c asm ("r1") = (word64*)&L_sha3_arm32_rt;
#else
register word64* L_sha3_arm2_rt_c = (word64*)&L_sha3_arm2_rt;
register word64* L_sha3_arm32_rt_c = (word64*)&L_sha3_arm32_rt;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"sub sp, sp, #0xcc\n\t"
"mov r1, %[L_sha3_arm2_rt]\n\t"
"mov r1, %[L_sha3_arm32_rt]\n\t"
"mov r2, #12\n\t"
"\n"
"L_sha3_arm32_begin_%=: \n\t"
@@ -2355,11 +2355,11 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"bne L_sha3_arm32_begin_%=\n\t"
"add sp, sp, #0xcc\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [state] "+r" (state), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
: [state] "+r" (state), [L_sha3_arm32_rt] "+r" (L_sha3_arm32_rt_c)
:
#else
:
: [state] "r" (state), [L_sha3_arm2_rt] "r" (L_sha3_arm2_rt_c)
: [state] "r" (state), [L_sha3_arm32_rt] "r" (L_sha3_arm32_rt_c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8",
"r9", "r10", "r11"
+2 -2
View File
@@ -961,14 +961,14 @@ L_chacha_crypt_bytes_arm64_lt_16:
beq L_chacha_crypt_bytes_arm64_done
L_chacha_crypt_bytes_arm64_lt_8:
mov x5, v0.d[0]
L_chacha_crypt_bytes_arm64loop_lt_8:
L_chacha_crypt_bytes_arm64_loop_lt_8:
# Encipher 1 byte at a time
ldrb w6, [x2], #1
eor w6, w6, w5
strb w6, [x1], #1
subs x3, x3, #1
lsr x5, x5, #8
bgt L_chacha_crypt_bytes_arm64loop_lt_8
bgt L_chacha_crypt_bytes_arm64_loop_lt_8
L_chacha_crypt_bytes_arm64_done:
L_chacha_crypt_bytes_arm64_done_all:
st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0]
+2 -2
View File
@@ -918,14 +918,14 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"L_chacha_crypt_bytes_arm64_lt_8_%=: \n\t"
"mov %[rol8], v0.d[0]\n\t"
"\n"
"L_chacha_crypt_bytes_arm64loop_lt_8_%=: \n\t"
"L_chacha_crypt_bytes_arm64_loop_lt_8_%=: \n\t"
/* Encipher 1 byte at a time */
"ldrb %w[ctr], [%x[m]], #1\n\t"
"eor %w[ctr], %w[ctr], %w[rol8]\n\t"
"strb %w[ctr], [%x[c]], #1\n\t"
"subs %w[len], %w[len], #1\n\t"
"lsr %[rol8], %[rol8], #8\n\t"
"b.gt L_chacha_crypt_bytes_arm64loop_lt_8_%=\n\t"
"b.gt L_chacha_crypt_bytes_arm64_loop_lt_8_%=\n\t"
"\n"
"L_chacha_crypt_bytes_arm64_done_%=: \n\t"
"\n"
+3 -3
View File
@@ -2188,7 +2188,7 @@ L_AES_CBC_encrypt_block_nr_256:
EOR r5, r5, r9
EOR r6, r6, r10
EOR r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
POP {r1, r2, lr}
LDR r3, [sp]
REV r4, r4
@@ -2444,7 +2444,7 @@ L_AES_CBC_encrypt_block_nr_192:
EOR r5, r5, r9
EOR r6, r6, r10
EOR r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
POP {r1, r2, lr}
LDR r3, [sp]
REV r4, r4
@@ -2700,7 +2700,7 @@ L_AES_CBC_encrypt_block_nr_128:
EOR r5, r5, r9
EOR r6, r6, r10
EOR r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
POP {r1, r2, lr}
LDR r3, [sp]
REV r4, r4
+3 -3
View File
@@ -2062,7 +2062,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"POP {r1, %[len], lr}\n\t"
"LDR %[ks], [sp]\n\t"
"REV r4, r4\n\t"
@@ -2339,7 +2339,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"POP {r1, %[len], lr}\n\t"
"LDR %[ks], [sp]\n\t"
"REV r4, r4\n\t"
@@ -2616,7 +2616,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"POP {r1, %[len], lr}\n\t"
"LDR %[ks], [sp]\n\t"
"REV r4, r4\n\t"
+3 -3
View File
@@ -92,9 +92,9 @@ wc_chacha_setkey:
STM r0!, {r3, r4, r5, r6}
/* Next 16 bytes of key. */
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_setkey_same_keyb_ytes
BEQ L_chacha_thumb2_setkey_same_key_bytes
#else
BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes
BEQ.N L_chacha_thumb2_setkey_same_key_bytes
#endif
/* Update key pointer for next 16 bytes. */
ADD r1, r1, r2
@@ -102,7 +102,7 @@ wc_chacha_setkey:
LDR r4, [r1, #4]
LDR r5, [r1, #8]
LDR r6, [r1, #12]
L_chacha_thumb2_setkey_same_keyb_ytes:
L_chacha_thumb2_setkey_same_key_bytes:
STM r0, {r3, r4, r5, r6}
POP {r4, r5, r6, r7, pc}
/* Cycle Count = 60 */
+5 -5
View File
@@ -124,11 +124,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
"STM %[x]!, {r3, r4, r5, r6}\n\t"
/* Next 16 bytes of key. */
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_setkey_same_keyb_ytes_%=\n\t"
"BEQ L_chacha_thumb2_setkey_same_key_bytes_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes\n\t"
"BEQ.N L_chacha_thumb2_setkey_same_key_bytes\n\t"
#else
"BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes_%=\n\t"
"BEQ.N L_chacha_thumb2_setkey_same_key_bytes_%=\n\t"
#endif
/* Update key pointer for next 16 bytes. */
"ADD %[key], %[key], %[keySz]\n\t"
@@ -138,9 +138,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
"LDR r6, [%[key], #12]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_chacha_thumb2_setkey_same_keyb_ytes:\n\t"
"L_chacha_thumb2_setkey_same_key_bytes:\n\t"
#else
"L_chacha_thumb2_setkey_same_keyb_ytes_%=:\n\t"
"L_chacha_thumb2_setkey_same_key_bytes_%=:\n\t"
#endif
"STM %[x], {r3, r4, r5, r6}\n\t"
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz),
+48 -48
View File
@@ -269,7 +269,7 @@ L_transform_sha512_avx1_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %xmm0, %xmm8, %xmm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 2-3
# rnd_0: 0 - 0
rorq $23, %rax
@@ -360,7 +360,7 @@ L_transform_sha512_avx1_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %xmm1, %xmm8, %xmm1
# msg_sched done: 2-5
# msg_sched done: 2-3
# msg_sched: 4-5
# rnd_0: 0 - 0
rorq $23, %rax
@@ -451,7 +451,7 @@ L_transform_sha512_avx1_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %xmm2, %xmm8, %xmm2
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 6-7
# rnd_0: 0 - 0
rorq $23, %rax
@@ -542,7 +542,7 @@ L_transform_sha512_avx1_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %xmm3, %xmm8, %xmm3
# msg_sched done: 6-9
# msg_sched done: 6-7
# msg_sched: 8-9
# rnd_0: 0 - 0
rorq $23, %rax
@@ -633,7 +633,7 @@ L_transform_sha512_avx1_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %xmm4, %xmm8, %xmm4
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 10-11
# rnd_0: 0 - 0
rorq $23, %rax
@@ -724,7 +724,7 @@ L_transform_sha512_avx1_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %xmm5, %xmm8, %xmm5
# msg_sched done: 10-13
# msg_sched done: 10-11
# msg_sched: 12-13
# rnd_0: 0 - 0
rorq $23, %rax
@@ -815,7 +815,7 @@ L_transform_sha512_avx1_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %xmm6, %xmm8, %xmm6
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 14-15
# rnd_0: 0 - 0
rorq $23, %rax
@@ -906,7 +906,7 @@ L_transform_sha512_avx1_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %xmm7, %xmm8, %xmm7
# msg_sched done: 14-17
# msg_sched done: 14-15
subl $0x01, 128(%rsp)
jne L_transform_sha512_avx1_start
vpaddq (%rsi), %xmm0, %xmm8
@@ -1547,7 +1547,7 @@ L_sha512_len_avx1_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %xmm0, %xmm8, %xmm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 2-3
# rnd_0: 0 - 0
rorq $23, %rax
@@ -1638,7 +1638,7 @@ L_sha512_len_avx1_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %xmm1, %xmm8, %xmm1
# msg_sched done: 2-5
# msg_sched done: 2-3
# msg_sched: 4-5
# rnd_0: 0 - 0
rorq $23, %rax
@@ -1729,7 +1729,7 @@ L_sha512_len_avx1_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %xmm2, %xmm8, %xmm2
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 6-7
# rnd_0: 0 - 0
rorq $23, %rax
@@ -1820,7 +1820,7 @@ L_sha512_len_avx1_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %xmm3, %xmm8, %xmm3
# msg_sched done: 6-9
# msg_sched done: 6-7
# msg_sched: 8-9
# rnd_0: 0 - 0
rorq $23, %rax
@@ -1911,7 +1911,7 @@ L_sha512_len_avx1_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %xmm4, %xmm8, %xmm4
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 10-11
# rnd_0: 0 - 0
rorq $23, %rax
@@ -2002,7 +2002,7 @@ L_sha512_len_avx1_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %xmm5, %xmm8, %xmm5
# msg_sched done: 10-13
# msg_sched done: 10-11
# msg_sched: 12-13
# rnd_0: 0 - 0
rorq $23, %rax
@@ -2093,7 +2093,7 @@ L_sha512_len_avx1_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %xmm6, %xmm8, %xmm6
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 14-15
# rnd_0: 0 - 0
rorq $23, %rax
@@ -2184,7 +2184,7 @@ L_sha512_len_avx1_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %xmm7, %xmm8, %xmm7
# msg_sched done: 14-17
# msg_sched done: 14-15
movq 136(%rsp), %rdx
vpaddq (%rdx), %xmm0, %xmm8
vpaddq 16(%rdx), %xmm1, %xmm9
@@ -2892,7 +2892,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %xmm0, %xmm8, %xmm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 2-3
# rnd_0: 0 - 0
rorxq $14, %r10, %rax
@@ -2978,7 +2978,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %xmm1, %xmm8, %xmm1
# msg_sched done: 2-5
# msg_sched done: 2-3
# msg_sched: 4-5
# rnd_0: 0 - 0
rorxq $14, %r8, %rax
@@ -3064,7 +3064,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %xmm2, %xmm8, %xmm2
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 6-7
# rnd_0: 0 - 0
rorxq $14, %r14, %rax
@@ -3150,7 +3150,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %xmm3, %xmm8, %xmm3
# msg_sched done: 6-9
# msg_sched done: 6-7
# msg_sched: 8-9
# rnd_0: 0 - 0
rorxq $14, %r12, %rax
@@ -3236,7 +3236,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %xmm4, %xmm8, %xmm4
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 10-11
# rnd_0: 0 - 0
rorxq $14, %r10, %rax
@@ -3322,7 +3322,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %xmm5, %xmm8, %xmm5
# msg_sched done: 10-13
# msg_sched done: 10-11
# msg_sched: 12-13
# rnd_0: 0 - 0
rorxq $14, %r8, %rax
@@ -3408,7 +3408,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %xmm6, %xmm8, %xmm6
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 14-15
# rnd_0: 0 - 0
rorxq $14, %r14, %rax
@@ -3494,7 +3494,7 @@ L_transform_sha512_avx1_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %xmm7, %xmm8, %xmm7
# msg_sched done: 14-17
# msg_sched done: 14-15
vpaddq (%rsi), %xmm0, %xmm8
vpaddq 16(%rsi), %xmm1, %xmm9
vmovdqu %xmm8, (%rsp)
@@ -4099,7 +4099,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %xmm0, %xmm8, %xmm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 2-3
# rnd_0: 0 - 0
rorxq $14, %r10, %rax
@@ -4185,7 +4185,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %xmm1, %xmm8, %xmm1
# msg_sched done: 2-5
# msg_sched done: 2-3
# msg_sched: 4-5
# rnd_0: 0 - 0
rorxq $14, %r8, %rax
@@ -4271,7 +4271,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %xmm2, %xmm8, %xmm2
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 6-7
# rnd_0: 0 - 0
rorxq $14, %r14, %rax
@@ -4357,7 +4357,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %xmm3, %xmm8, %xmm3
# msg_sched done: 6-9
# msg_sched done: 6-7
# msg_sched: 8-9
# rnd_0: 0 - 0
rorxq $14, %r12, %rax
@@ -4443,7 +4443,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %xmm4, %xmm8, %xmm4
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 10-11
# rnd_0: 0 - 0
rorxq $14, %r10, %rax
@@ -4529,7 +4529,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %xmm5, %xmm8, %xmm5
# msg_sched done: 10-13
# msg_sched done: 10-11
# msg_sched: 12-13
# rnd_0: 0 - 0
rorxq $14, %r8, %rax
@@ -4615,7 +4615,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %xmm6, %xmm8, %xmm6
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 14-15
# rnd_0: 0 - 0
rorxq $14, %r14, %rax
@@ -4701,7 +4701,7 @@ L_sha512_len_avx1_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %xmm7, %xmm8, %xmm7
# msg_sched done: 14-17
# msg_sched done: 14-15
movq 136(%rsp), %rcx
vpaddq (%rcx), %xmm0, %xmm8
vpaddq 16(%rcx), %xmm1, %xmm9
@@ -6585,7 +6585,7 @@ L_sha512_len_avx2_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %ymm0, %ymm8, %ymm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 4-5
rorq $23, %rax
vpalignr $8, %ymm1, %ymm2, %ymm12
@@ -6662,7 +6662,7 @@ L_sha512_len_avx2_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %ymm1, %ymm8, %ymm1
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 8-9
rorq $23, %rax
vpalignr $8, %ymm2, %ymm3, %ymm12
@@ -6739,7 +6739,7 @@ L_sha512_len_avx2_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %ymm2, %ymm8, %ymm2
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 12-13
rorq $23, %rax
vpalignr $8, %ymm3, %ymm4, %ymm12
@@ -6816,7 +6816,7 @@ L_sha512_len_avx2_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %ymm3, %ymm8, %ymm3
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 16-17
rorq $23, %rax
vpalignr $8, %ymm4, %ymm5, %ymm12
@@ -6893,7 +6893,7 @@ L_sha512_len_avx2_start:
movq %r10, %rax
addq %rcx, %r14
vpaddq %ymm4, %ymm8, %ymm4
# msg_sched done: 16-19
# msg_sched done: 16-17
# msg_sched: 20-21
rorq $23, %rax
vpalignr $8, %ymm5, %ymm6, %ymm12
@@ -6970,7 +6970,7 @@ L_sha512_len_avx2_start:
movq %r8, %rax
addq %rcx, %r12
vpaddq %ymm5, %ymm8, %ymm5
# msg_sched done: 20-23
# msg_sched done: 20-21
# msg_sched: 24-25
rorq $23, %rax
vpalignr $8, %ymm6, %ymm7, %ymm12
@@ -7047,7 +7047,7 @@ L_sha512_len_avx2_start:
movq %r14, %rax
addq %rcx, %r10
vpaddq %ymm6, %ymm8, %ymm6
# msg_sched done: 24-27
# msg_sched done: 24-25
# msg_sched: 28-29
rorq $23, %rax
vpalignr $8, %ymm7, %ymm0, %ymm12
@@ -7124,7 +7124,7 @@ L_sha512_len_avx2_start:
movq %r12, %rax
addq %rcx, %r8
vpaddq %ymm7, %ymm8, %ymm7
# msg_sched done: 28-31
# msg_sched done: 28-29
addq $0x100, %rsi
addq $0x100, %rbp
cmpq L_avx2_sha512_k_2_end(%rip), %rsi
@@ -9381,7 +9381,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %ymm0, %ymm8, %ymm0
# msg_sched done: 0-3
# msg_sched done: 0-1
# msg_sched: 4-5
rorxq $14, %r10, %rax
rorxq $18, %r10, %rcx
@@ -9454,7 +9454,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %ymm1, %ymm8, %ymm1
# msg_sched done: 4-7
# msg_sched done: 4-5
# msg_sched: 8-9
rorxq $14, %r8, %rax
rorxq $18, %r8, %rcx
@@ -9527,7 +9527,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %ymm2, %ymm8, %ymm2
# msg_sched done: 8-11
# msg_sched done: 8-9
# msg_sched: 12-13
rorxq $14, %r14, %rax
rorxq $18, %r14, %rcx
@@ -9600,7 +9600,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %ymm3, %ymm8, %ymm3
# msg_sched done: 12-15
# msg_sched done: 12-13
# msg_sched: 16-17
rorxq $14, %r12, %rax
rorxq $18, %r12, %rcx
@@ -9673,7 +9673,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r14
xorq %r8, %rdx
vpaddq %ymm4, %ymm8, %ymm4
# msg_sched done: 16-19
# msg_sched done: 16-17
# msg_sched: 20-21
rorxq $14, %r10, %rax
rorxq $18, %r10, %rcx
@@ -9746,7 +9746,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r12
xorq %r14, %rdx
vpaddq %ymm5, %ymm8, %ymm5
# msg_sched done: 20-23
# msg_sched done: 20-21
# msg_sched: 24-25
rorxq $14, %r8, %rax
rorxq $18, %r8, %rcx
@@ -9819,7 +9819,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r10
xorq %r12, %rdx
vpaddq %ymm6, %ymm8, %ymm6
# msg_sched done: 24-27
# msg_sched done: 24-25
# msg_sched: 28-29
rorxq $14, %r14, %rax
rorxq $18, %r14, %rcx
@@ -9892,7 +9892,7 @@ L_sha512_len_avx2_rorx_start:
addq %rax, %r8
xorq %r10, %rdx
vpaddq %ymm7, %ymm8, %ymm7
# msg_sched done: 28-31
# msg_sched done: 28-29
addq $0x100, %rbp
addq $0x100, %rsi
cmpq L_avx2_rorx_sha512_k_2_end(%rip), %rbp
+98 -84
View File
@@ -5656,7 +5656,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x100\n\t"
"\n"
"L_sp_2048_sub_in_pkace_64_word_%=: \n\t"
"L_sp_2048_sub_in_place_64_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -5667,7 +5667,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_2048_sub_in_pkace_64_word_%=\n\t"
"bne L_sp_2048_sub_in_place_64_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -6162,7 +6162,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x80\n\t"
"\n"
"L_sp_2048_sub_in_pkace_32_word_%=: \n\t"
"L_sp_2048_sub_in_place_32_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -6173,7 +6173,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_2048_sub_in_pkace_32_word_%=\n\t"
"bne L_sp_2048_sub_in_place_32_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -9943,15 +9943,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32(
#endif
"ldr r10, [%[a], #124]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -9959,11 +9959,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -14909,15 +14909,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64(
#endif
"ldr r10, [%[a], #252]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -14925,11 +14925,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -29071,7 +29071,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x180\n\t"
"\n"
"L_sp_3072_sub_in_pkace_96_word_%=: \n\t"
"L_sp_3072_sub_in_place_96_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -29082,7 +29082,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_3072_sub_in_pkace_96_word_%=\n\t"
"bne L_sp_3072_sub_in_place_96_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -29577,7 +29577,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0xc0\n\t"
"\n"
"L_sp_3072_sub_in_pkace_48_word_%=: \n\t"
"L_sp_3072_sub_in_place_48_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -29588,7 +29588,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_3072_sub_in_pkace_48_word_%=\n\t"
"bne L_sp_3072_sub_in_place_48_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -34902,15 +34902,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48(
#endif
"ldr r10, [%[a], #188]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -34918,11 +34918,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -41804,15 +41804,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96(
#endif
"ldr r10, [%[a], #380]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -41820,11 +41820,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -47639,7 +47639,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x200\n\t"
"\n"
"L_sp_4096_sub_in_pkace_128_word_%=: \n\t"
"L_sp_4096_sub_in_place_128_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -47650,7 +47650,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_4096_sub_in_pkace_128_word_%=\n\t"
"bne L_sp_4096_sub_in_place_128_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -56588,15 +56588,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128(
#endif
"ldr r10, [%[a], #508]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -56604,11 +56604,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -73172,15 +73172,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a,
#endif
"ldr r10, [%[a], #28]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -73188,11 +73188,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a,
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -73860,15 +73860,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8(
#endif
"ldr r10, [%[a], #28]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -73876,11 +73876,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -78284,7 +78284,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #32\n\t"
"\n"
"L_sp_256_sub_in_pkace_8_word_%=: \n\t"
"L_sp_256_sub_in_place_8_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -78295,7 +78295,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_256_sub_in_pkace_8_word_%=\n\t"
"bne L_sp_256_sub_in_place_8_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -91143,15 +91143,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a,
#endif
"ldr r10, [%[a], #44]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -91159,11 +91159,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a,
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -96341,7 +96341,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #48\n\t"
"\n"
"L_sp_384_sub_in_pkace_12_word_%=: \n\t"
"L_sp_384_sub_in_place_12_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -96352,7 +96352,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_384_sub_in_pkace_12_word_%=\n\t"
"bne L_sp_384_sub_in_place_12_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -118074,15 +118074,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17(
#endif
"ldr r10, [%[a], #64]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"adcs r5, r3, #0\n\t"
"adcs r5, r5, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -118090,11 +118090,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17(
"adcs r5, r5, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
@@ -119416,9 +119416,23 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_tpl_17(sp_digit* r,
"sub %[r], %[r], #0x44\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"adds r4, r4, r2\n\t"
"adcs r4, r4, #0\n\t"
"adcs r5, r5, #0\n\t"
"adcs r6, r6, #0\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, #0\n\t"
"adcs r9, r9, #0\n\t"
"adcs r10, r10, #0\n\t"
"adcs r11, r11, #0\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"adcs r4, r4, #0\n\t"
"adcs r5, r5, #0\n\t"
"adcs r6, r6, #0\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, #0\n\t"
"adcs r9, r9, #0\n\t"
"adcs r10, r10, #0\n\t"
"adcs r11, r11, #0\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"ldm %[r], {r4}\n\t"
"adcs r4, r4, #0\n\t"
@@ -124063,77 +124077,77 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r,
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
"ldr r6, [%[a], #8]\n\t"
"str r4, [%[a]]\n\t"
"str r4, [%[r]]\n\t"
"lsl r3, r6, r12\n\t"
"lsr r6, r6, %[n]\n\t"
"orr r5, r5, r3\n\t"
"ldr r4, [%[a], #12]\n\t"
"str r5, [%[a], #4]\n\t"
"str r5, [%[r], #4]\n\t"
"lsl r3, r4, r12\n\t"
"lsr r4, r4, %[n]\n\t"
"orr r6, r6, r3\n\t"
"ldr r5, [%[a], #16]\n\t"
"str r6, [%[a], #8]\n\t"
"str r6, [%[r], #8]\n\t"
"lsl r3, r5, r12\n\t"
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
"ldr r6, [%[a], #20]\n\t"
"str r4, [%[a], #12]\n\t"
"str r4, [%[r], #12]\n\t"
"lsl r3, r6, r12\n\t"
"lsr r6, r6, %[n]\n\t"
"orr r5, r5, r3\n\t"
"ldr r4, [%[a], #24]\n\t"
"str r5, [%[a], #16]\n\t"
"str r5, [%[r], #16]\n\t"
"lsl r3, r4, r12\n\t"
"lsr r4, r4, %[n]\n\t"
"orr r6, r6, r3\n\t"
"ldr r5, [%[a], #28]\n\t"
"str r6, [%[a], #20]\n\t"
"str r6, [%[r], #20]\n\t"
"lsl r3, r5, r12\n\t"
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
"ldr r6, [%[a], #32]\n\t"
"str r4, [%[a], #24]\n\t"
"str r4, [%[r], #24]\n\t"
"lsl r3, r6, r12\n\t"
"lsr r6, r6, %[n]\n\t"
"orr r5, r5, r3\n\t"
"ldr r4, [%[a], #36]\n\t"
"str r5, [%[a], #28]\n\t"
"str r5, [%[r], #28]\n\t"
"lsl r3, r4, r12\n\t"
"lsr r4, r4, %[n]\n\t"
"orr r6, r6, r3\n\t"
"ldr r5, [%[a], #40]\n\t"
"str r6, [%[a], #32]\n\t"
"str r6, [%[r], #32]\n\t"
"lsl r3, r5, r12\n\t"
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
"ldr r6, [%[a], #44]\n\t"
"str r4, [%[a], #36]\n\t"
"str r4, [%[r], #36]\n\t"
"lsl r3, r6, r12\n\t"
"lsr r6, r6, %[n]\n\t"
"orr r5, r5, r3\n\t"
"ldr r4, [%[a], #48]\n\t"
"str r5, [%[a], #40]\n\t"
"str r5, [%[r], #40]\n\t"
"lsl r3, r4, r12\n\t"
"lsr r4, r4, %[n]\n\t"
"orr r6, r6, r3\n\t"
"ldr r5, [%[a], #52]\n\t"
"str r6, [%[a], #44]\n\t"
"str r6, [%[r], #44]\n\t"
"lsl r3, r5, r12\n\t"
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
"ldr r6, [%[a], #56]\n\t"
"str r4, [%[a], #48]\n\t"
"str r4, [%[r], #48]\n\t"
"lsl r3, r6, r12\n\t"
"lsr r6, r6, %[n]\n\t"
"orr r5, r5, r3\n\t"
"ldr r4, [%[a], #60]\n\t"
"str r5, [%[a], #52]\n\t"
"str r5, [%[r], #52]\n\t"
"lsl r3, r4, r12\n\t"
"lsr r4, r4, %[n]\n\t"
"orr r6, r6, r3\n\t"
"ldr r5, [%[a], #64]\n\t"
"str r6, [%[a], #56]\n\t"
"str r6, [%[r], #56]\n\t"
"lsl r3, r5, r12\n\t"
"lsr r5, r5, %[n]\n\t"
"orr r4, r4, r3\n\t"
@@ -124541,7 +124555,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x40\n\t"
"\n"
"L_sp_521_sub_in_pkace_17_word_%=: \n\t"
"L_sp_521_sub_in_place_17_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -124552,7 +124566,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_521_sub_in_pkace_17_word_%=\n\t"
"bne L_sp_521_sub_in_place_17_word_%=\n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2}\n\t"
"ldm %[b]!, {r6}\n\t"
@@ -144329,7 +144343,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
"mov r12, #0\n\t"
"add lr, %[a], #0x80\n\t"
"\n"
"L_sp_1024_sub_in_pkace_32_word_%=: \n\t"
"L_sp_1024_sub_in_place_32_word_%=: \n\t"
"rsbs r12, r12, #0\n\t"
"ldm %[a], {r2, r3, r4, r5}\n\t"
"ldm %[b]!, {r6, r7, r8, r9}\n\t"
@@ -144340,7 +144354,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
"stm %[a]!, {r2, r3, r4, r5}\n\t"
"sbc r12, r12, r12\n\t"
"cmp %[a], lr\n\t"
"bne L_sp_1024_sub_in_pkace_32_word_%=\n\t"
"bne L_sp_1024_sub_in_place_32_word_%=\n\t"
"mov %[a], r12\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a), [b] "+r" (b)
@@ -147711,15 +147725,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32(
#endif
"ldr r10, [%[a], #124]\n\t"
"lsl r6, r8, #16\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r5, r5, r7\n\t"
"adcs r4, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"mov r3, #0\n\t"
"adc r3, r3, r3\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
@@ -147727,11 +147741,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32(
"adcs r4, r4, r7\n\t"
"adc r3, r3, #0\n\t"
"mov r6, r8\n\t"
"lsr r7, r11, #16\n\t"
"lsr r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r7, r6, r7\n\t"
"adds r4, r4, r7\n\t"
"lsl r7, r11, #16\n\t"
"lsl r7, r7, #16\n\t"
"adc r3, r3, #0\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7, r6\n\t"
+75 -61
View File
@@ -2357,9 +2357,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
"ADD r11, %[a], #0x100\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_2048_sub_in_pkace_64_word:\n\t"
"L_sp_2048_sub_in_place_64_word:\n\t"
#else
"L_sp_2048_sub_in_pkace_64_word_%=:\n\t"
"L_sp_2048_sub_in_place_64_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -2372,11 +2372,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_2048_sub_in_pkace_64_word_%=\n\t"
"BNE L_sp_2048_sub_in_place_64_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_2048_sub_in_pkace_64_word\n\t"
"BNE.N L_sp_2048_sub_in_place_64_word\n\t"
#else
"BNE.N L_sp_2048_sub_in_pkace_64_word_%=\n\t"
"BNE.N L_sp_2048_sub_in_place_64_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -2737,9 +2737,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
"ADD r11, %[a], #0x80\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_2048_sub_in_pkace_32_word:\n\t"
"L_sp_2048_sub_in_place_32_word:\n\t"
#else
"L_sp_2048_sub_in_pkace_32_word_%=:\n\t"
"L_sp_2048_sub_in_place_32_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -2752,11 +2752,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_2048_sub_in_pkace_32_word_%=\n\t"
"BNE L_sp_2048_sub_in_place_32_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_2048_sub_in_pkace_32_word\n\t"
"BNE.N L_sp_2048_sub_in_place_32_word\n\t"
#else
"BNE.N L_sp_2048_sub_in_pkace_32_word_%=\n\t"
"BNE.N L_sp_2048_sub_in_place_32_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -13347,9 +13347,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
"ADD r11, %[a], #0x180\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_3072_sub_in_pkace_96_word:\n\t"
"L_sp_3072_sub_in_place_96_word:\n\t"
#else
"L_sp_3072_sub_in_pkace_96_word_%=:\n\t"
"L_sp_3072_sub_in_place_96_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -13362,11 +13362,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_3072_sub_in_pkace_96_word_%=\n\t"
"BNE L_sp_3072_sub_in_place_96_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_3072_sub_in_pkace_96_word\n\t"
"BNE.N L_sp_3072_sub_in_place_96_word\n\t"
#else
"BNE.N L_sp_3072_sub_in_pkace_96_word_%=\n\t"
"BNE.N L_sp_3072_sub_in_place_96_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -13727,9 +13727,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
"ADD r11, %[a], #0xc0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_3072_sub_in_pkace_48_word:\n\t"
"L_sp_3072_sub_in_place_48_word:\n\t"
#else
"L_sp_3072_sub_in_pkace_48_word_%=:\n\t"
"L_sp_3072_sub_in_place_48_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -13742,11 +13742,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_3072_sub_in_pkace_48_word_%=\n\t"
"BNE L_sp_3072_sub_in_place_48_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_3072_sub_in_pkace_48_word\n\t"
"BNE.N L_sp_3072_sub_in_place_48_word\n\t"
#else
"BNE.N L_sp_3072_sub_in_pkace_48_word_%=\n\t"
"BNE.N L_sp_3072_sub_in_place_48_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -23690,9 +23690,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
"ADD r11, %[a], #0x200\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_4096_sub_in_pkace_128_word:\n\t"
"L_sp_4096_sub_in_place_128_word:\n\t"
#else
"L_sp_4096_sub_in_pkace_128_word_%=:\n\t"
"L_sp_4096_sub_in_place_128_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -23705,11 +23705,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_4096_sub_in_pkace_128_word_%=\n\t"
"BNE L_sp_4096_sub_in_place_128_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_4096_sub_in_pkace_128_word\n\t"
"BNE.N L_sp_4096_sub_in_place_128_word\n\t"
#else
"BNE.N L_sp_4096_sub_in_pkace_128_word_%=\n\t"
"BNE.N L_sp_4096_sub_in_place_128_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -39666,9 +39666,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a,
"ADD r11, %[a], #0x20\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_256_sub_in_pkace_8_word:\n\t"
"L_sp_256_sub_in_place_8_word:\n\t"
#else
"L_sp_256_sub_in_pkace_8_word_%=:\n\t"
"L_sp_256_sub_in_place_8_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -39681,11 +39681,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_256_sub_in_pkace_8_word_%=\n\t"
"BNE L_sp_256_sub_in_place_8_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_256_sub_in_pkace_8_word\n\t"
"BNE.N L_sp_256_sub_in_place_8_word\n\t"
#else
"BNE.N L_sp_256_sub_in_pkace_8_word_%=\n\t"
"BNE.N L_sp_256_sub_in_place_8_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -49643,9 +49643,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a,
"ADD r11, %[a], #0x30\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_384_sub_in_pkace_12_word:\n\t"
"L_sp_384_sub_in_place_12_word:\n\t"
#else
"L_sp_384_sub_in_pkace_12_word_%=:\n\t"
"L_sp_384_sub_in_place_12_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -49658,11 +49658,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_384_sub_in_pkace_12_word_%=\n\t"
"BNE L_sp_384_sub_in_place_12_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_384_sub_in_pkace_12_word\n\t"
"BNE.N L_sp_384_sub_in_place_12_word\n\t"
#else
"BNE.N L_sp_384_sub_in_pkace_12_word_%=\n\t"
"BNE.N L_sp_384_sub_in_place_12_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
@@ -57454,9 +57454,23 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r,
"SUB %[r], %[r], #0x44\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"ADDS r4, r4, r2\n\t"
"ADCS r4, r4, #0x0\n\t"
"ADCS r5, r5, #0x0\n\t"
"ADCS r6, r6, #0x0\n\t"
"ADCS r7, r7, #0x0\n\t"
"ADCS r8, r8, #0x0\n\t"
"ADCS r9, r9, #0x0\n\t"
"ADCS r10, r10, #0x0\n\t"
"ADCS r11, r11, #0x0\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"ADCS r4, r4, #0x0\n\t"
"ADCS r5, r5, #0x0\n\t"
"ADCS r6, r6, #0x0\n\t"
"ADCS r7, r7, #0x0\n\t"
"ADCS r8, r8, #0x0\n\t"
"ADCS r9, r9, #0x0\n\t"
"ADCS r10, r10, #0x0\n\t"
"ADCS r11, r11, #0x0\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"LDM %[r], {r4}\n\t"
"ADCS r4, r4, #0x0\n\t"
@@ -62068,77 +62082,77 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r,
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
"LDR r6, [%[a], #8]\n\t"
"STR r4, [%[a]]\n\t"
"STR r4, [%[r]]\n\t"
"LSL r3, r6, r7\n\t"
"LSR r6, r6, %[n]\n\t"
"ORR r5, r5, r3\n\t"
"LDR r4, [%[a], #12]\n\t"
"STR r5, [%[a], #4]\n\t"
"STR r5, [%[r], #4]\n\t"
"LSL r3, r4, r7\n\t"
"LSR r4, r4, %[n]\n\t"
"ORR r6, r6, r3\n\t"
"LDR r5, [%[a], #16]\n\t"
"STR r6, [%[a], #8]\n\t"
"STR r6, [%[r], #8]\n\t"
"LSL r3, r5, r7\n\t"
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
"LDR r6, [%[a], #20]\n\t"
"STR r4, [%[a], #12]\n\t"
"STR r4, [%[r], #12]\n\t"
"LSL r3, r6, r7\n\t"
"LSR r6, r6, %[n]\n\t"
"ORR r5, r5, r3\n\t"
"LDR r4, [%[a], #24]\n\t"
"STR r5, [%[a], #16]\n\t"
"STR r5, [%[r], #16]\n\t"
"LSL r3, r4, r7\n\t"
"LSR r4, r4, %[n]\n\t"
"ORR r6, r6, r3\n\t"
"LDR r5, [%[a], #28]\n\t"
"STR r6, [%[a], #20]\n\t"
"STR r6, [%[r], #20]\n\t"
"LSL r3, r5, r7\n\t"
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
"LDR r6, [%[a], #32]\n\t"
"STR r4, [%[a], #24]\n\t"
"STR r4, [%[r], #24]\n\t"
"LSL r3, r6, r7\n\t"
"LSR r6, r6, %[n]\n\t"
"ORR r5, r5, r3\n\t"
"LDR r4, [%[a], #36]\n\t"
"STR r5, [%[a], #28]\n\t"
"STR r5, [%[r], #28]\n\t"
"LSL r3, r4, r7\n\t"
"LSR r4, r4, %[n]\n\t"
"ORR r6, r6, r3\n\t"
"LDR r5, [%[a], #40]\n\t"
"STR r6, [%[a], #32]\n\t"
"STR r6, [%[r], #32]\n\t"
"LSL r3, r5, r7\n\t"
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
"LDR r6, [%[a], #44]\n\t"
"STR r4, [%[a], #36]\n\t"
"STR r4, [%[r], #36]\n\t"
"LSL r3, r6, r7\n\t"
"LSR r6, r6, %[n]\n\t"
"ORR r5, r5, r3\n\t"
"LDR r4, [%[a], #48]\n\t"
"STR r5, [%[a], #40]\n\t"
"STR r5, [%[r], #40]\n\t"
"LSL r3, r4, r7\n\t"
"LSR r4, r4, %[n]\n\t"
"ORR r6, r6, r3\n\t"
"LDR r5, [%[a], #52]\n\t"
"STR r6, [%[a], #44]\n\t"
"STR r6, [%[r], #44]\n\t"
"LSL r3, r5, r7\n\t"
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
"LDR r6, [%[a], #56]\n\t"
"STR r4, [%[a], #48]\n\t"
"STR r4, [%[r], #48]\n\t"
"LSL r3, r6, r7\n\t"
"LSR r6, r6, %[n]\n\t"
"ORR r5, r5, r3\n\t"
"LDR r4, [%[a], #60]\n\t"
"STR r5, [%[a], #52]\n\t"
"STR r5, [%[r], #52]\n\t"
"LSL r3, r4, r7\n\t"
"LSR r4, r4, %[n]\n\t"
"ORR r6, r6, r3\n\t"
"LDR r5, [%[a], #64]\n\t"
"STR r6, [%[a], #56]\n\t"
"STR r6, [%[r], #56]\n\t"
"LSL r3, r5, r7\n\t"
"LSR r5, r5, %[n]\n\t"
"ORR r4, r4, r3\n\t"
@@ -62527,9 +62541,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a,
"ADD r11, %[a], #0x40\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_521_sub_in_pkace_17_word:\n\t"
"L_sp_521_sub_in_place_17_word:\n\t"
#else
"L_sp_521_sub_in_pkace_17_word_%=:\n\t"
"L_sp_521_sub_in_place_17_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -62542,11 +62556,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_521_sub_in_pkace_17_word_%=\n\t"
"BNE L_sp_521_sub_in_place_17_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_521_sub_in_pkace_17_word\n\t"
"BNE.N L_sp_521_sub_in_place_17_word\n\t"
#else
"BNE.N L_sp_521_sub_in_pkace_17_word_%=\n\t"
"BNE.N L_sp_521_sub_in_place_17_word_%=\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2}\n\t"
@@ -68852,9 +68866,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
"ADD r11, %[a], #0x80\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sp_1024_sub_in_pkace_32_word:\n\t"
"L_sp_1024_sub_in_place_32_word:\n\t"
#else
"L_sp_1024_sub_in_pkace_32_word_%=:\n\t"
"L_sp_1024_sub_in_place_32_word_%=:\n\t"
#endif
"RSBS r10, r10, #0x0\n\t"
"LDM %[a], {r2, r3, r4, r5}\n\t"
@@ -68867,11 +68881,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
"SBC r10, r10, r10\n\t"
"CMP %[a], r11\n\t"
#if defined(__GNUC__)
"BNE L_sp_1024_sub_in_pkace_32_word_%=\n\t"
"BNE L_sp_1024_sub_in_place_32_word_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_sp_1024_sub_in_pkace_32_word\n\t"
"BNE.N L_sp_1024_sub_in_place_32_word\n\t"
#else
"BNE.N L_sp_1024_sub_in_pkace_32_word_%=\n\t"
"BNE.N L_sp_1024_sub_in_place_32_word_%=\n\t"
#endif
"MOV %[a], r10\n\t"
: [a] "+r" (a), [b] "+r" (b)
+5 -5
View File
@@ -66434,7 +66434,7 @@ _sp_521_mont_add_9:
/* Double a Montgomery form number (r = a + a % m).
*
* r Result of addition.
* a Number to souble in Montgomery form.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
@@ -76656,7 +76656,7 @@ _sp_1024_mont_add_16:
/* Double a Montgomery form number (r = a + a % m).
*
* r Result of addition.
* a Number to souble in Montgomery form.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
@@ -76830,7 +76830,7 @@ _sp_1024_mont_dbl_16:
/* Triple a Montgomery form number (r = a + a + a % m).
*
* r Result of addition.
* a Number to souble in Montgomery form.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
@@ -77984,7 +77984,7 @@ _sp_1024_mont_add_avx2_16:
/* Double a Montgomery form number (r = a + a % m).
*
* r Result of addition.
* a Number to souble in Montgomery form.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
@@ -78142,7 +78142,7 @@ _sp_1024_mont_dbl_avx2_16:
/* Triple a Montgomery form number (r = a + a + a % m).
*
* r Result of addition.
* a Number to souble in Montgomery form.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
+5 -5
View File
@@ -64790,7 +64790,7 @@ _text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montgomery form.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
@@ -74754,7 +74754,7 @@ _text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montgomery form.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
@@ -74919,7 +74919,7 @@ _text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montgomery form.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
@@ -76037,7 +76037,7 @@ IFDEF HAVE_INTEL_AVX2
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montgomery form.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
@@ -76186,7 +76186,7 @@ IFDEF HAVE_INTEL_AVX2
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montgomery form.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA