ARM32/Thumb2: generated asm fixes

Fix Thumb2 Curve25519 asm to do full reduce.
Change ARM32 to simpler carry/overflow processing.
Minor optimizations - use ubfx, no need to move register into temporary, cache value instead of loading again later.
Reduce the register push and pops in Thumb2 generated code.
Fix Thumb2 to have values less than 64 in decimal.
This commit is contained in:
Sean Parkinson
2026-06-18 11:20:52 +10:00
parent 18c9684c9d
commit 5956da84c2
24 changed files with 4971 additions and 4766 deletions
+170 -59
View File
@@ -8367,37 +8367,36 @@ L_AES_set_encrypt_key_loop_256:
add r2, r2, #16
stm r2, {r4, r5, r6, r7}
sub r2, r2, #16
mov r3, r7
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r4, r3, #16
lsl r4, r7, #16
lsr r4, r4, #24
#else
uxtb r4, r3, ror #8
uxtb r4, r7, ror #8
#endif
#else
ubfx r4, r3, #8, #8
ubfx r4, r7, #8, #8
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r5, r3, #8
lsl r5, r7, #8
lsr r5, r5, #24
#else
uxtb r5, r3, ror #16
uxtb r5, r7, ror #16
#endif
#else
ubfx r5, r3, #16, #8
ubfx r5, r7, #16, #8
#endif
lsr r6, r3, #24
lsr r6, r7, #24
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r3, r3, #24
lsl r3, r7, #24
lsr r3, r3, #24
#else
uxtb r3, r3
uxtb r3, r7
#endif
#else
ubfx r3, r3, #0, #8
ubfx r3, r7, #0, #8
#endif
ldrb r4, [r8, r4, lsl #2]
ldrb r6, [r8, r6, lsl #2]
@@ -23252,14 +23251,22 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #12]
ldr r3, [r2, #12]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r8, r9, r10, r11}
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23275,9 +23282,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23292,9 +23303,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23309,9 +23324,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23326,9 +23345,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23359,9 +23382,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23385,8 +23412,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #8]
ldr r3, [r2, #8]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23396,7 +23427,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23412,9 +23447,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23429,9 +23468,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23446,9 +23489,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23463,9 +23510,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23496,9 +23547,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23522,8 +23577,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #4]
ldr r3, [r2, #4]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23533,7 +23592,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23549,9 +23612,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23566,9 +23633,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23583,9 +23654,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23600,9 +23675,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23633,9 +23712,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23659,8 +23742,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0]
ldr r3, [r2]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23670,7 +23757,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23686,9 +23777,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23703,9 +23798,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23720,9 +23819,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23737,9 +23840,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23770,9 +23877,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
+170 -59
View File
@@ -8993,37 +8993,36 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
"add %[ks], %[ks], #16\n\t"
"stm %[ks], {r4, r5, r6, r7}\n\t"
"sub %[ks], %[ks], #16\n\t"
"mov r3, r7\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r4, r3, #16\n\t"
"lsl r4, r7, #16\n\t"
"lsr r4, r4, #24\n\t"
#else
"uxtb r4, r3, ror #8\n\t"
"uxtb r4, r7, ror #8\n\t"
#endif
#else
"ubfx r4, r3, #8, #8\n\t"
"ubfx r4, r7, #8, #8\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r5, r3, #8\n\t"
"lsl r5, r7, #8\n\t"
"lsr r5, r5, #24\n\t"
#else
"uxtb r5, r3, ror #16\n\t"
"uxtb r5, r7, ror #16\n\t"
#endif
#else
"ubfx r5, r3, #16, #8\n\t"
"ubfx r5, r7, #16, #8\n\t"
#endif
"lsr r6, r3, #24\n\t"
"lsr r6, r7, #24\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r3, r3, #24\n\t"
"lsl r3, r7, #24\n\t"
"lsr r3, r3, #24\n\t"
#else
"uxtb r3, r3\n\t"
"uxtb r3, r7\n\t"
#endif
#else
"ubfx r3, r3, #0, #8\n\t"
"ubfx r3, r7, #0, #8\n\t"
#endif
"ldrb r4, [r8, r4, lsl #2]\n\t"
"ldrb r6, [r8, r6, lsl #2]\n\t"
@@ -24111,14 +24110,22 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #12]\n\t"
"ldr %[len], [r2, #12]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r8, r9, r10, r11}\n\t"
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24134,9 +24141,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24151,9 +24162,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24168,9 +24183,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24185,9 +24204,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24218,9 +24241,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24244,8 +24271,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #8]\n\t"
"ldr %[len], [r2, #8]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24255,7 +24286,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24271,9 +24306,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24288,9 +24327,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24305,9 +24348,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24322,9 +24369,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24355,9 +24406,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24381,8 +24436,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #4]\n\t"
"ldr %[len], [r2, #4]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24392,7 +24451,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24408,9 +24471,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24425,9 +24492,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24442,9 +24513,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24459,9 +24534,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24492,9 +24571,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24518,8 +24601,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0]\n\t"
"ldr %[len], [r2]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24529,7 +24616,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24545,9 +24636,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24562,9 +24657,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24579,9 +24678,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24596,9 +24699,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24629,9 +24736,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
+11 -23
View File
@@ -155,8 +155,7 @@ fe_add_sub_op:
#endif
# Sub
sbcs r10, r4, r6
sbcs r11, r5, r7
sbc lr, lr, lr
sbc r11, r5, r7
# Add
subs r12, r12, #1
adcs r8, r4, r6
@@ -222,12 +221,9 @@ fe_add_sub_op:
#else
strd r8, r9, [r0, #24]
#endif
# Multiply -modulus by underflow
lsl r3, lr, #1
mvn lr, #18
orr r3, r3, r11, lsr #31
mul lr, r3, lr
# Sub -x*modulus (if overflow)
# Add -modulus on underflow
mov lr, #19
and lr, lr, r11, asr #31
ldm r1, {r4, r5, r6, r7, r8, r9}
subs r4, r4, lr
sbcs r5, r5, #0
@@ -263,12 +259,9 @@ fe_sub_op:
sbcs r10, r2, r10
sbcs r11, r3, r11
sbcs r12, r4, r12
sbcs lr, r5, lr
sbc r3, r3, r3
mvn r2, #18
lsl r3, r3, #1
orr r3, r3, lr, lsr #31
mul r2, r3, r2
sbc lr, r5, lr
mov r2, #19
and r2, r2, lr, asr #31
subs r6, r6, r2
sbcs r7, r7, #0
sbcs r8, r8, #0
@@ -312,13 +305,9 @@ fe_add_op:
adcs r10, r2, r10
adcs r11, r3, r11
adcs r12, r4, r12
mov r3, #0
adcs lr, r5, lr
adc r3, r3, #0
adc lr, r5, lr
mov r2, #19
lsl r3, r3, #1
orr r3, r3, lr, lsr #31
mul r2, r3, r2
and r2, r2, lr, asr #31
adds r6, r6, r2
adcs r7, r7, #0
adcs r8, r8, #0
@@ -575,6 +564,7 @@ fe_isnonzero:
fe_isnegative:
push {r4, r5, lr}
ldm r0!, {r2, r3, r4, r5}
and r12, r2, #1
adds r1, r2, #19
adcs r1, r3, #0
adcs r1, r4, #0
@@ -583,11 +573,9 @@ fe_isnegative:
adcs r1, r2, #0
adcs r1, r3, #0
adcs r1, r4, #0
ldr r2, [r0, #-16]
adc r1, r5, #0
and r0, r2, #1
lsr r1, r1, #31
eor r0, r0, r1
eor r0, r12, r1
pop {r4, r5, pc}
.size fe_isnegative,.-fe_isnegative
#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || defined(WOLFSSL_CURVE25519_USE_ED25519)
+28 -40
View File
@@ -59,9 +59,9 @@
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_init(void)
WC_OMIT_FRAME_POINTER void fe_init()
#else
WC_OMIT_FRAME_POINTER void fe_init(void)
WC_OMIT_FRAME_POINTER void fe_init()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init(void)
void fe_add_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_add_sub_op()
#else
WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_add_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -198,8 +198,7 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
#endif
/* Sub */
"sbcs r10, r4, r6\n\t"
"sbcs r11, r5, r7\n\t"
"sbc lr, lr, lr\n\t"
"sbc r11, r5, r7\n\t"
/* Add */
"subs r12, r12, #1\n\t"
"adcs r8, r4, r6\n\t"
@@ -265,12 +264,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
#else
"strd r8, r9, [r0, #24]\n\t"
#endif
/* Multiply -modulus by underflow */
"lsl r3, lr, #1\n\t"
"mvn lr, #18\n\t"
"orr r3, r3, r11, lsr #31\n\t"
"mul lr, r3, lr\n\t"
/* Sub -x*modulus (if overflow) */
/* Add -modulus on underflow */
"mov lr, #19\n\t"
"and lr, lr, r11, asr #31\n\t"
"ldm r1, {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, lr\n\t"
"sbcs r5, r5, #0\n\t"
@@ -300,9 +296,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
void fe_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_sub_op()
#else
WC_OMIT_FRAME_POINTER void fe_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -319,12 +315,9 @@ WC_OMIT_FRAME_POINTER void fe_sub_op(void)
"sbcs r10, r2, r10\n\t"
"sbcs r11, r3, r11\n\t"
"sbcs r12, r4, r12\n\t"
"sbcs lr, r5, lr\n\t"
"sbc r3, r3, r3\n\t"
"mvn r2, #18\n\t"
"lsl r3, r3, #1\n\t"
"orr r3, r3, lr, lsr #31\n\t"
"mul r2, r3, r2\n\t"
"sbc lr, r5, lr\n\t"
"mov r2, #19\n\t"
"and r2, r2, lr, asr #31\n\t"
"subs r6, r6, r2\n\t"
"sbcs r7, r7, #0\n\t"
"sbcs r8, r8, #0\n\t"
@@ -379,9 +372,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b)
void fe_add_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_add_op(void)
WC_OMIT_FRAME_POINTER void fe_add_op()
#else
WC_OMIT_FRAME_POINTER void fe_add_op(void)
WC_OMIT_FRAME_POINTER void fe_add_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -398,13 +391,9 @@ WC_OMIT_FRAME_POINTER void fe_add_op(void)
"adcs r10, r2, r10\n\t"
"adcs r11, r3, r11\n\t"
"adcs r12, r4, r12\n\t"
"mov r3, #0\n\t"
"adcs lr, r5, lr\n\t"
"adc r3, r3, #0\n\t"
"adc lr, r5, lr\n\t"
"mov r2, #19\n\t"
"lsl r3, r3, #1\n\t"
"orr r3, r3, lr, lsr #31\n\t"
"mul r2, r3, r2\n\t"
"and r2, r2, lr, asr #31\n\t"
"adds r6, r6, r2\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, #0\n\t"
@@ -797,6 +786,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
__asm__ __volatile__ (
"ldm %[a]!, {r2, r3, r4, r5}\n\t"
"and r12, r2, #1\n\t"
"adds r1, r2, #19\n\t"
"adcs r1, r3, #0\n\t"
"adcs r1, r4, #0\n\t"
@@ -805,11 +795,9 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
"adcs r1, r2, #0\n\t"
"adcs r1, r3, #0\n\t"
"adcs r1, r4, #0\n\t"
"ldr r2, [%[a], #-16]\n\t"
"adc r1, r5, #0\n\t"
"and %[a], r2, #1\n\t"
"lsr r1, r1, #31\n\t"
"eor %[a], %[a], r1\n\t"
"eor %[a], r12, r1\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a)
:
@@ -817,7 +805,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
:
: [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5"
: "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r12"
);
return (word32)(size_t)a;
}
@@ -2510,9 +2498,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#else
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -2905,9 +2893,9 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void)
#else
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#else
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -3086,9 +3074,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#else
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -3374,9 +3362,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void)
#else
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#else
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+38 -46
View File
@@ -5736,7 +5736,7 @@ L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -7099,10 +7099,10 @@ L_aes_gcm_encrypt_arm64_crypto_192_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -8637,10 +8637,10 @@ L_aes_gcm_encrypt_arm64_crypto_256_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -9944,10 +9944,10 @@ L_aes_gcm_encrypt_arm64_crypto_128_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -10537,7 +10537,7 @@ L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes:
# Done GHASH
L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -11893,10 +11893,10 @@ L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -11945,7 +11945,6 @@ L_aes_gcm_decrypt_arm64_crypto_192_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_192_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -13447,10 +13446,10 @@ L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -13507,7 +13506,6 @@ L_aes_gcm_decrypt_arm64_crypto_256_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_256_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -14770,10 +14768,10 @@ L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -14818,7 +14816,6 @@ L_aes_gcm_decrypt_arm64_crypto_128_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_128_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -15367,7 +15364,7 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -16701,10 +16698,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -18210,10 +18207,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -19488,10 +19485,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -20059,7 +20056,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes:
# Done GHASH
L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -21386,10 +21383,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -21437,7 +21434,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -22911,10 +22907,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -22970,7 +22966,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -24205,10 +24200,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -24252,7 +24247,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -24426,7 +24420,7 @@ L_aes_gcm_init_arm64_crypto_end_bytes:
# Done GHASH
L_aes_gcm_init_arm64_crypto_partial_done:
eor x7, x7, x7
ubfiz x13, x3, #3, #32
lsl x13, x3, #3
mov v7.d[0], x7
mov v7.d[1], x13
rev64 v7.16b, v7.16b
@@ -28788,10 +28782,10 @@ _AES_GCM_encrypt_final_AARCH64:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -32674,10 +32668,10 @@ _AES_GCM_decrypt_final_AARCH64:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -32702,7 +32696,6 @@ _AES_GCM_decrypt_final_AARCH64:
ld1 {v0.16b}, [x1]
b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded
L_aes_gcm_decrypt_final_arm64_crypto_part_tag:
ubfiz x2, x2, #0, #32
eor v0.16b, v0.16b, v0.16b
mov x10, x2
st1 {v0.2d}, [x0]
@@ -32863,7 +32856,7 @@ L_aes_gcm_init_arm64_crypto_eor3_end_bytes:
# Done GHASH
L_aes_gcm_init_arm64_crypto_eor3_partial_done:
eor x7, x7, x7
ubfiz x13, x3, #3, #32
lsl x13, x3, #3
mov v7.d[0], x7
mov v7.d[1], x13
rev64 v7.16b, v7.16b
@@ -37121,10 +37114,10 @@ _AES_GCM_encrypt_final_AARCH64_EOR3:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -40922,10 +40915,10 @@ _AES_GCM_decrypt_final_AARCH64_EOR3:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -40949,7 +40942,6 @@ _AES_GCM_decrypt_final_AARCH64_EOR3:
ld1 {v0.16b}, [x1]
b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded
L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag:
ubfiz x2, x2, #0, #32
eor v0.16b, v0.16b, v0.16b
mov x10, x2
st1 {v0.2d}, [x0]
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -38,7 +38,7 @@
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#include <wolfssl/wolfcrypt/fe_operations.h>
void fe_init(void)
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
@@ -229,8 +229,8 @@ int fe_isnonzero(const fe a)
"orr %x[a], x1, x2\n\t"
"orr x3, x3, x4\n\t"
"orr %x[a], %x[a], x3\n\t"
: [a] "+r" (a)
:
: [a] "r" (a)
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6"
);
return (word32)(size_t)a;
@@ -248,8 +248,8 @@ int fe_isnegative(const fe a)
"adc x5, x4, xzr\n\t"
"and %x[a], x1, #1\n\t"
"eor %x[a], %x[a], x5, lsr 63\n\t"
: [a] "+r" (a)
:
: [a] "r" (a)
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6"
);
return (word32)(size_t)a;
@@ -4362,7 +4362,7 @@ int curve25519_base(byte* r, const byte* n)
/* Store */
"stp x14, x15, [%x[r]]\n\t"
"stp x16, x17, [%x[r], #16]\n\t"
"mov %x[r], xzr\n\t"
"mov x0, xzr\n\t"
"ldp x29, x30, [sp], #0xb0\n\t"
: [r] "+r" (r)
: [n] "r" (n), [x2] "r" (x2)
@@ -6969,7 +6969,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Store */
"stp x14, x15, [%x[r]]\n\t"
"stp x16, x17, [%x[r], #16]\n\t"
"mov %x[r], xzr\n\t"
"mov x0, xzr\n\t"
"ldp x29, x30, [sp], #0xc0\n\t"
: [r] "+r" (r)
: [n] "r" (n), [a] "r" (a)
+6 -6
View File
@@ -8406,11 +8406,11 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
"orr v8.16b, v8.16b, v10.16b\n\t"
"ext v9.16b, v8.16b, v8.16b, #8\n\t"
"orr v8.16b, v8.16b, v9.16b\n\t"
"mov %x[a], v8.d[0]\n\t"
"subs %x[a], %x[a], xzr\n\t"
"csetm %w[a], ne\n\t"
: [a] "+r" (a), [sz] "+r" (sz)
: [b] "r" (b)
"mov x0, v8.d[0]\n\t"
"subs x0, x0, xzr\n\t"
"csetm w0, ne\n\t"
: [sz] "+r" (sz)
: [a] "r" (a), [b] "r" (b)
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11"
);
@@ -9089,7 +9089,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
"b L_mlkem_rej_uniform_loop_lt_4_%=\n\t"
"\n"
"L_mlkem_rej_uniform_done_%=:\n\t"
"mov %x[p], x12\n\t"
"mov x0, x12\n\t"
: [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen)
: [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits),
[indices] "r" (indices)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+33 -33
View File
@@ -39,7 +39,7 @@
.type wc_chacha_setiv, %function
wc_chacha_setiv:
PUSH {r4, r5, r6, lr}
ADD r3, r0, #0x34
ADD r3, r0, #52
LDR r4, [r1]
LDR r5, [r1, #4]
LDR r6, [r1, #8]
@@ -76,7 +76,7 @@ L_chacha_thumb2_constants:
wc_chacha_setkey:
PUSH {r4, r5, r6, r7, lr}
ADR r7, L_chacha_thumb2_constants
SUBS r2, r2, #0x10
SUBS r2, r2, #16
ADD r7, r7, r2
/* Start state with constants */
LDM r7, {r3, r4, r5, r6}
@@ -116,7 +116,7 @@ L_chacha_thumb2_setkey_same_key_bytes:
.type wc_chacha_crypt_bytes, %function
wc_chacha_crypt_bytes:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x34
SUB sp, sp, #52
MOV lr, r0
STRD r0, r1, [sp, #32]
STRD r2, r3, [sp, #40]
@@ -129,7 +129,7 @@ L_chacha_thumb2_crypt_block:
/* Load x[0]..x[12] into registers. */
LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}
/* 10x 2 full rounds to perform. */
MOV lr, #0xa
MOV lr, #10
STR lr, [sp, #48]
L_chacha_thumb2_crypt_loop:
/* 0, 4, 8, 12 */
@@ -248,7 +248,7 @@ L_chacha_thumb2_crypt_loop:
STR lr, [sp, #20]
/* Check if we have done enough rounds. */
LDR lr, [sp, #48]
SUBS lr, lr, #0x1
SUBS lr, lr, #1
STR lr, [sp, #48]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGT L_chacha_thumb2_crypt_loop
@@ -283,7 +283,7 @@ L_chacha_thumb2_crypt_loop:
LDM lr!, {r10, r11}
ADD r8, r8, r10
ADD r9, r9, r11
ADD r10, r10, #0x1
ADD r10, r10, #1
STM r12!, {r8, r9}
STR r10, [lr, #-8]
LDM r12, {r8, r9}
@@ -388,7 +388,7 @@ L_chacha_thumb2_crypt_lt_block:
STR r12, [lr, #64]
ADD lr, lr, #0x44
L_chacha_thumb2_crypt_16byte_loop:
CMP r3, #0x10
CMP r3, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_crypt_word_loop
#else
@@ -404,7 +404,7 @@ L_chacha_thumb2_crypt_16byte_loop:
EOR r9, r9, r5
EOR r10, r10, r6
EOR r11, r11, r7
SUBS r3, r3, #0x10
SUBS r3, r3, #16
STR r8, [r1]
STR r9, [r1, #4]
STR r10, [r1, #8]
@@ -414,15 +414,15 @@ L_chacha_thumb2_crypt_16byte_loop:
#else
BEQ.N L_chacha_thumb2_crypt_done
#endif
ADD r2, r2, #0x10
ADD r1, r1, #0x10
ADD r2, r2, #16
ADD r1, r1, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_16byte_loop
#else
B.N L_chacha_thumb2_crypt_16byte_loop
#endif
L_chacha_thumb2_crypt_word_loop:
CMP r3, #0x4
CMP r3, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_crypt_byte_start
#else
@@ -432,16 +432,16 @@ L_chacha_thumb2_crypt_word_loop:
LDR r4, [lr]
LDR r8, [r2]
EOR r8, r8, r4
SUBS r3, r3, #0x4
SUBS r3, r3, #4
STR r8, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_crypt_done
#else
BEQ.N L_chacha_thumb2_crypt_done
#endif
ADD lr, lr, #0x4
ADD r2, r2, #0x4
ADD r1, r1, #0x4
ADD lr, lr, #4
ADD r2, r2, #4
ADD r1, r1, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_word_loop
#else
@@ -452,7 +452,7 @@ L_chacha_thumb2_crypt_byte_start:
L_chacha_thumb2_crypt_byte_loop:
LDRB r8, [r2]
EOR r8, r8, r4
SUBS r3, r3, #0x1
SUBS r3, r3, #1
STRB r8, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_crypt_done
@@ -460,15 +460,15 @@ L_chacha_thumb2_crypt_byte_loop:
BEQ.N L_chacha_thumb2_crypt_done
#endif
LSR r4, r4, #8
ADD r2, r2, #0x1
ADD r1, r1, #0x1
ADD r2, r2, #1
ADD r1, r1, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_byte_loop
#else
B.N L_chacha_thumb2_crypt_byte_loop
#endif
L_chacha_thumb2_crypt_done:
ADD sp, sp, #0x34
ADD sp, sp, #52
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 508 */
.size wc_chacha_crypt_bytes,.-wc_chacha_crypt_bytes
@@ -479,7 +479,7 @@ L_chacha_thumb2_crypt_done:
wc_chacha_use_over:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
L_chacha_thumb2_over_16byte_loop:
CMP r3, #0x10
CMP r3, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_over_word_loop
#else
@@ -498,7 +498,7 @@ L_chacha_thumb2_over_16byte_loop:
EOR r5, r5, r9
EOR r6, r6, r10
EOR r7, r7, r11
SUBS r3, r3, #0x10
SUBS r3, r3, #16
STR r4, [r1]
STR r5, [r1, #4]
STR r6, [r1, #8]
@@ -508,16 +508,16 @@ L_chacha_thumb2_over_16byte_loop:
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x10
ADD r2, r2, #0x10
ADD r1, r1, #0x10
ADD r0, r0, #16
ADD r2, r2, #16
ADD r1, r1, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_16byte_loop
#else
B.N L_chacha_thumb2_over_16byte_loop
#endif
L_chacha_thumb2_over_word_loop:
CMP r3, #0x4
CMP r3, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_over_byte_loop
#else
@@ -527,16 +527,16 @@ L_chacha_thumb2_over_word_loop:
LDR r4, [r0]
LDR r8, [r2]
EOR r4, r4, r8
SUBS r3, r3, #0x4
SUBS r3, r3, #4
STR r4, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_over_done
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x4
ADD r2, r2, #0x4
ADD r1, r1, #0x4
ADD r0, r0, #4
ADD r2, r2, #4
ADD r1, r1, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_word_loop
#else
@@ -547,16 +547,16 @@ L_chacha_thumb2_over_byte_loop:
LDRB r4, [r0]
LDRB r8, [r2]
EOR r4, r4, r8
SUBS r3, r3, #0x1
SUBS r3, r3, #1
STRB r4, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_over_done
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x1
ADD r2, r2, #0x1
ADD r1, r1, #0x1
ADD r0, r0, #1
ADD r2, r2, #1
ADD r1, r1, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_byte_loop
#else
+33 -33
View File
@@ -65,7 +65,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"ADD r3, %[x], #0x34\n\t"
"ADD r3, %[x], #52\n\t"
"LDR r4, [%[iv]]\n\t"
"LDR r5, [%[iv], #4]\n\t"
"LDR r6, [%[iv], #8]\n\t"
@@ -113,7 +113,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
__asm__ __volatile__ (
"MOV r7, %[L_chacha_thumb2_constants]\n\t"
"SUBS %[keySz], %[keySz], #0x10\n\t"
"SUBS %[keySz], %[keySz], #16\n\t"
"ADD r7, r7, %[keySz]\n\t"
/* Start state with constants */
"LDM r7, {r3, r4, r5, r6}\n\t"
@@ -180,7 +180,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x34\n\t"
"SUB sp, sp, #52\n\t"
"MOV lr, %[ctx]\n\t"
"STRD %[ctx], %[c], [sp, #32]\n\t"
"STRD %[m], %[len], [sp, #40]\n\t"
@@ -198,7 +198,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
/* Load x[0]..x[12] into registers. */
"LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t"
/* 10x 2 full rounds to perform. */
"MOV lr, #0xa\n\t"
"MOV lr, #10\n\t"
"STR lr, [sp, #48]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -322,7 +322,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"STR lr, [sp, #20]\n\t"
/* Check if we have done enough rounds. */
"LDR lr, [sp, #48]\n\t"
"SUBS lr, lr, #0x1\n\t"
"SUBS lr, lr, #1\n\t"
"STR lr, [sp, #48]\n\t"
#if defined(__GNUC__)
"BGT L_chacha_thumb2_crypt_loop_%=\n\t"
@@ -359,7 +359,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"LDM lr!, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t"
"ADD r10, r10, #0x1\n\t"
"ADD r10, r10, #1\n\t"
"STM r12!, {r8, r9}\n\t"
"STR r10, [lr, #-8]\n\t"
"LDM r12, {r8, r9}\n\t"
@@ -480,7 +480,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_16byte_loop_%=:\n\t"
#endif
"CMP %[len], #0x10\n\t"
"CMP %[len], #16\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_crypt_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -498,7 +498,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"EOR r9, r9, r5\n\t"
"EOR r10, r10, r6\n\t"
"EOR r11, r11, r7\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
"STR r8, [%[c]]\n\t"
"STR r9, [%[c], #4]\n\t"
"STR r10, [%[c], #8]\n\t"
@@ -510,8 +510,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"ADD %[m], %[m], #0x10\n\t"
"ADD %[c], %[c], #0x10\n\t"
"ADD %[m], %[m], #16\n\t"
"ADD %[c], %[c], #16\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_16byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -525,7 +525,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_word_loop_%=:\n\t"
#endif
"CMP %[len], #0x4\n\t"
"CMP %[len], #4\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_crypt_byte_start_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -537,7 +537,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"LDR r4, [lr]\n\t"
"LDR r8, [%[m]]\n\t"
"EOR r8, r8, r4\n\t"
"SUBS %[len], %[len], #0x4\n\t"
"SUBS %[len], %[len], #4\n\t"
"STR r8, [%[c]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_crypt_done_%=\n\t"
@@ -546,9 +546,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"ADD lr, lr, #0x4\n\t"
"ADD %[m], %[m], #0x4\n\t"
"ADD %[c], %[c], #0x4\n\t"
"ADD lr, lr, #4\n\t"
"ADD %[m], %[m], #4\n\t"
"ADD %[c], %[c], #4\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -571,7 +571,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#endif
"LDRB r8, [%[m]]\n\t"
"EOR r8, r8, r4\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"SUBS %[len], %[len], #1\n\t"
"STRB r8, [%[c]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_crypt_done_%=\n\t"
@@ -581,8 +581,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"LSR r4, r4, #8\n\t"
"ADD %[m], %[m], #0x1\n\t"
"ADD %[c], %[c], #0x1\n\t"
"ADD %[m], %[m], #1\n\t"
"ADD %[c], %[c], #1\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -596,7 +596,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_done_%=:\n\t"
#endif
"ADD sp, sp, #0x34\n\t"
"ADD sp, sp, #52\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len)
:
@@ -631,7 +631,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"L_chacha_thumb2_over_16byte_loop_%=:\n\t"
#endif
"CMP %[len], #0x10\n\t"
"CMP %[len], #16\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_over_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -652,7 +652,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
"STR r4, [%[output]]\n\t"
"STR r5, [%[output], #4]\n\t"
"STR r6, [%[output], #8]\n\t"
@@ -664,9 +664,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x10\n\t"
"ADD %[input], %[input], #0x10\n\t"
"ADD %[output], %[output], #0x10\n\t"
"ADD %[over], %[over], #16\n\t"
"ADD %[input], %[input], #16\n\t"
"ADD %[output], %[output], #16\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_16byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -680,7 +680,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"L_chacha_thumb2_over_word_loop_%=:\n\t"
#endif
"CMP %[len], #0x4\n\t"
"CMP %[len], #4\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_over_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -692,7 +692,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"LDR r4, [%[over]]\n\t"
"LDR r8, [%[input]]\n\t"
"EOR r4, r4, r8\n\t"
"SUBS %[len], %[len], #0x4\n\t"
"SUBS %[len], %[len], #4\n\t"
"STR r4, [%[output]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_over_done_%=\n\t"
@@ -701,9 +701,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x4\n\t"
"ADD %[input], %[input], #0x4\n\t"
"ADD %[output], %[output], #0x4\n\t"
"ADD %[over], %[over], #4\n\t"
"ADD %[input], %[input], #4\n\t"
"ADD %[output], %[output], #4\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -721,7 +721,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"LDRB r4, [%[over]]\n\t"
"LDRB r8, [%[input]]\n\t"
"EOR r4, r4, r8\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"SUBS %[len], %[len], #1\n\t"
"STRB r4, [%[output]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_over_done_%=\n\t"
@@ -730,9 +730,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x1\n\t"
"ADD %[input], %[input], #0x1\n\t"
"ADD %[output], %[output], #0x1\n\t"
"ADD %[over], %[over], #1\n\t"
"ADD %[input], %[input], #1\n\t"
"ADD %[output], %[output], #1\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+54 -54
View File
@@ -69,13 +69,13 @@ L_mlkem_thumb2_ntt_zetas:
.type mlkem_thumb2_ntt, %function
mlkem_thumb2_ntt:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x8
SUB sp, sp, #8
ADR r1, L_mlkem_thumb2_ntt_zetas
#ifndef WOLFSSL_ARM_ARCH_7M
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r2, #0x10
MOV r2, #16
L_mlkem_thumb2_ntt_loop_123:
STR r2, [sp]
LDRH lr, [r1, #2]
@@ -507,19 +507,19 @@ L_mlkem_thumb2_ntt_loop_123:
STR r8, [r0, #384]
STR r9, [r0, #448]
LDR r2, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_123
#else
BNE.N L_mlkem_thumb2_ntt_loop_123
#endif
SUB r0, r0, #0x40
MOV r3, #0x0
MOV r3, #0
L_mlkem_thumb2_ntt_loop_4_j:
STR r3, [sp, #4]
ADD lr, r1, r3, LSR #4
MOV r2, #0x4
MOV r2, #4
LDR lr, [lr, #16]
L_mlkem_thumb2_ntt_loop_4_i:
STR r2, [sp]
@@ -676,8 +676,8 @@ L_mlkem_thumb2_ntt_loop_4_i:
STR r8, [r0, #96]
STR r9, [r0, #112]
LDRD r2, r3, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_4_i
#else
@@ -692,7 +692,7 @@ L_mlkem_thumb2_ntt_loop_4_i:
BNE.N L_mlkem_thumb2_ntt_loop_4_j
#endif
SUB r0, r0, #0x200
MOV r3, #0x0
MOV r3, #0
L_mlkem_thumb2_ntt_loop_567:
ADD lr, r1, r3, LSR #3
STR r3, [sp, #4]
@@ -1310,15 +1310,15 @@ L_mlkem_thumb2_ntt_loop_567:
STR r8, [r0, #24]
STR r9, [r0, #28]
LDR r3, [sp, #4]
ADD r3, r3, #0x10
ADD r3, r3, #16
RSBS r10, r3, #0x100
ADD r0, r0, #0x20
ADD r0, r0, #32
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_567
#else
BNE.N L_mlkem_thumb2_ntt_loop_567
#endif
ADD sp, sp, #0x8
ADD sp, sp, #8
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 1270 */
.size mlkem_thumb2_ntt,.-mlkem_thumb2_ntt
@@ -1358,13 +1358,13 @@ L_mlkem_invntt_zetas_inv:
.type mlkem_thumb2_invntt, %function
mlkem_thumb2_invntt:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x8
SUB sp, sp, #8
ADR r1, L_mlkem_invntt_zetas_inv
#ifndef WOLFSSL_ARM_ARCH_7M
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r3, #0x0
MOV r3, #0
L_mlkem_invntt_loop_765:
ADD lr, r1, r3, LSR #1
STR r3, [sp, #4]
@@ -1953,20 +1953,20 @@ L_mlkem_invntt_loop_765:
STR r8, [r0, #24]
STR r9, [r0, #28]
LDR r3, [sp, #4]
ADD r3, r3, #0x10
ADD r3, r3, #16
RSBS r10, r3, #0x100
ADD r0, r0, #0x20
ADD r0, r0, #32
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_765
#else
BNE.N L_mlkem_invntt_loop_765
#endif
SUB r0, r0, #0x200
MOV r3, #0x0
MOV r3, #0
L_mlkem_invntt_loop_4_j:
STR r3, [sp, #4]
ADD lr, r1, r3, LSR #4
MOV r2, #0x4
MOV r2, #4
LDR lr, [lr, #224]
L_mlkem_invntt_loop_4_i:
STR r2, [sp]
@@ -2143,8 +2143,8 @@ L_mlkem_invntt_loop_4_i:
STR r8, [r0, #96]
STR r9, [r0, #112]
LDRD r2, r3, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_4_i
#else
@@ -2159,7 +2159,7 @@ L_mlkem_invntt_loop_4_i:
BNE.N L_mlkem_invntt_loop_4_j
#endif
SUB r0, r0, #0x200
MOV r2, #0x10
MOV r2, #16
L_mlkem_invntt_loop_321:
STR r2, [sp]
LDRH lr, [r1, #2]
@@ -2967,14 +2967,14 @@ L_mlkem_invntt_loop_321:
STR r8, [r0, #384]
STR r9, [r0, #448]
LDR r2, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_321
#else
BNE.N L_mlkem_invntt_loop_321
#endif
ADD sp, sp, #0x8
ADD sp, sp, #8
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 1629 */
.size mlkem_thumb2_invntt,.-mlkem_thumb2_invntt
@@ -3020,12 +3020,12 @@ mlkem_thumb2_basemul_mont:
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r8, #0x0
MOV r8, #0
L_mlkem_basemul_mont_loop:
LDM r1!, {r4, r5}
LDM r2!, {r6, r7}
LDR lr, [r3, r8]
ADD r8, r8, #0x2
ADD r8, r8, #2
PUSH {r8}
CMP r8, #0x80
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3035,7 +3035,7 @@ L_mlkem_basemul_mont_loop:
SMULTB r11, r12, r10
SMLABB r8, r12, r9, r8
SMLABB r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SMULBT r8, lr, r8
SMULBT r10, r11, r10
SMLABB r8, r4, r6, r8
@@ -3071,7 +3071,7 @@ L_mlkem_basemul_mont_loop:
SBFX r11, r11, #0, #16
MLA r8, r12, r9, r8
MLA r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SBFX r9, lr, #0, #16
SBFX r11, r11, #0, #16
ASR r8, r8, #16
@@ -3143,12 +3143,12 @@ mlkem_thumb2_basemul_mont_add:
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r8, #0x0
MOV r8, #0
L_mlkem_thumb2_basemul_mont_add_loop:
LDM r1!, {r4, r5}
LDM r2!, {r6, r7}
LDR lr, [r3, r8]
ADD r8, r8, #0x2
ADD r8, r8, #2
PUSH {r8}
CMP r8, #0x80
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3158,7 +3158,7 @@ L_mlkem_thumb2_basemul_mont_add_loop:
SMULTB r11, r12, r10
SMLABB r8, r12, r9, r8
SMLABB r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SMULBT r8, lr, r8
SMULBT r10, r11, r10
SMLABB r8, r4, r6, r8
@@ -3197,7 +3197,7 @@ L_mlkem_thumb2_basemul_mont_add_loop:
SBFX r11, r11, #0, #16
MLA r8, r12, r9, r8
MLA r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SBFX r9, lr, #0, #16
SBFX r11, r11, #0, #16
ASR r8, r8, #16
@@ -3346,7 +3346,7 @@ L_mlkem_thumb2_csubq_loop:
BFI r5, r10, #0, #16
#endif /* !WOLFSSL_ARM_ARCH_7M */
STM r0!, {r2, r3, r4, r5}
SUBS r1, r1, #0x8
SUBS r1, r1, #8
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_csubq_loop
#else
@@ -3362,9 +3362,9 @@ L_mlkem_thumb2_csubq_loop:
mlkem_thumb2_rej_uniform:
PUSH {r4, r5, r6, r7, r8, r9, r10, lr}
MOV r8, #0xd01
MOV r9, #0x0
MOV r9, #0
L_mlkem_thumb2_rej_uniform_loop_no_fail:
CMP r1, #0x8
CMP r1, #8
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_mlkem_thumb2_rej_uniform_done_no_fail
#else
@@ -3421,7 +3421,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail:
LSR r10, r10, #31
SUB r1, r1, r10
ADD r9, r9, r10, LSL #1
SUBS r3, r3, #0xc
SUBS r3, r3, #12
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_rej_uniform_loop_no_fail
#else
@@ -3433,7 +3433,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail:
B.N L_mlkem_thumb2_rej_uniform_done
#endif
L_mlkem_thumb2_rej_uniform_done_no_fail:
CMP r1, #0x0
CMP r1, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3449,8 +3449,8 @@ L_mlkem_thumb2_rej_uniform_loop:
BGE.N L_mlkem_thumb2_rej_uniform_fail_0
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3465,8 +3465,8 @@ L_mlkem_thumb2_rej_uniform_fail_0:
BGE.N L_mlkem_thumb2_rej_uniform_fail_1
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3482,8 +3482,8 @@ L_mlkem_thumb2_rej_uniform_fail_1:
BGE.N L_mlkem_thumb2_rej_uniform_fail_2
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3498,8 +3498,8 @@ L_mlkem_thumb2_rej_uniform_fail_2:
BGE.N L_mlkem_thumb2_rej_uniform_fail_3
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3514,8 +3514,8 @@ L_mlkem_thumb2_rej_uniform_fail_3:
BGE.N L_mlkem_thumb2_rej_uniform_fail_4
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3531,8 +3531,8 @@ L_mlkem_thumb2_rej_uniform_fail_4:
BGE.N L_mlkem_thumb2_rej_uniform_fail_5
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3547,8 +3547,8 @@ L_mlkem_thumb2_rej_uniform_fail_5:
BGE.N L_mlkem_thumb2_rej_uniform_fail_6
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3563,15 +3563,15 @@ L_mlkem_thumb2_rej_uniform_fail_6:
BGE.N L_mlkem_thumb2_rej_uniform_fail_7
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
BEQ.N L_mlkem_thumb2_rej_uniform_done
#endif
L_mlkem_thumb2_rej_uniform_fail_7:
SUBS r3, r3, #0xc
SUBS r3, r3, #12
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGT L_mlkem_thumb2_rej_uniform_loop
#else
+54 -54
View File
@@ -85,13 +85,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"SUB sp, sp, #8\n\t"
"MOV r1, %[L_mlkem_thumb2_ntt_zetas]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r2, #0x10\n\t"
"MOV r2, #16\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_123:\n\t"
@@ -528,8 +528,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -538,7 +538,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"BNE.N L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#endif
"SUB %[r], %[r], #0x40\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_4_j:\n\t"
@@ -547,7 +547,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"MOV r2, #4\n\t"
"LDR lr, [lr, #16]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -709,8 +709,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -729,7 +729,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"BNE.N L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_567:\n\t"
@@ -1352,9 +1352,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"ADD r3, r3, #16\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
"ADD %[r], %[r], #32\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1362,7 +1362,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#else
"BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
"ADD sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c)
@@ -1412,13 +1412,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"SUB sp, sp, #8\n\t"
"MOV r1, %[L_mlkem_invntt_zetas_inv]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_765:\n\t"
@@ -2012,9 +2012,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"ADD r3, r3, #16\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
"ADD %[r], %[r], #32\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_765_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2023,7 +2023,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"BNE.N L_mlkem_invntt_loop_765_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_4_j:\n\t"
@@ -2032,7 +2032,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"MOV r2, #4\n\t"
"LDR lr, [lr, #224]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2214,8 +2214,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2234,7 +2234,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"BNE.N L_mlkem_invntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r2, #0x10\n\t"
"MOV r2, #16\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_321:\n\t"
@@ -3047,8 +3047,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_321_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3056,7 +3056,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#else
"BNE.N L_mlkem_invntt_loop_321_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
"ADD sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c)
@@ -3116,7 +3116,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r8, #0x0\n\t"
"MOV r8, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_basemul_mont_loop:\n\t"
@@ -3126,7 +3126,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"ADD r8, r8, #2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3136,7 +3136,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
@@ -3172,7 +3172,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
@@ -3271,7 +3271,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r8, #0x0\n\t"
"MOV r8, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_basemul_mont_add_loop:\n\t"
@@ -3281,7 +3281,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"ADD r8, r8, #2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3291,7 +3291,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
@@ -3330,7 +3330,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
@@ -3507,7 +3507,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p)
"BFI r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"STM %[p]!, {r2, r3, r4, r5}\n\t"
"SUBS r1, r1, #0x8\n\t"
"SUBS r1, r1, #8\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_csubq_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3551,14 +3551,14 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
__asm__ __volatile__ (
"MOV r8, #0xd01\n\t"
"MOV r9, #0x0\n\t"
"MOV r9, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x8\n\t"
"CMP %[len], #8\n\t"
#if defined(__GNUC__)
"BLT L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3617,7 +3617,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"SUBS %[rLen], %[rLen], #0xc\n\t"
"SUBS %[rLen], %[rLen], #12\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3638,7 +3638,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
#else
"L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x0\n\t"
"CMP %[len], #0\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3663,8 +3663,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3688,8 +3688,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3714,8 +3714,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3739,8 +3739,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3764,8 +3764,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3790,8 +3790,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3815,8 +3815,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3840,8 +3840,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3855,7 +3855,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
#else
"L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t"
#endif
"SUBS %[rLen], %[rLen], #0xc\n\t"
"SUBS %[rLen], %[rLen], #12\n\t"
#if defined(__GNUC__)
"BGT L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
+29 -29
View File
@@ -39,17 +39,17 @@
.type poly1305_blocks_thumb2_16, %function
poly1305_blocks_thumb2_16:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x1c
CMP r2, #0x0
SUB sp, sp, #28
CMP r2, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_poly1305_thumb2_16_done
#else
BEQ.N L_poly1305_thumb2_16_done
#endif
ADD lr, sp, #0xc
ADD lr, sp, #12
STM lr, {r0, r1, r2, r3}
/* Get h pointer */
ADD lr, r0, #0x10
ADD lr, r0, #16
LDM lr, {r4, r5, r6, r7, r8}
L_poly1305_thumb2_16_loop:
/* Add m to h */
@@ -63,7 +63,7 @@ L_poly1305_thumb2_16_loop:
ADCS r5, r5, r3
ADCS r6, r6, r9
ADCS r7, r7, r10
ADD r1, r1, #0x10
ADD r1, r1, #16
ADC r8, r8, r11
#ifdef WOLFSSL_ARM_ARCH_7M
STM lr, {r4, r5, r6, r7, r8}
@@ -205,7 +205,7 @@ L_poly1305_thumb2_16_loop:
LDR r5, [lr, #16]
/* r[3] * h[3] */
UMAAL r10, r11, r3, r4
MOV r12, #0x0
MOV r12, #0
/* r[0] * h[4] */
UMAAL r8, r12, r0, r5
/* r[1] * h[4] */
@@ -221,8 +221,8 @@ L_poly1305_thumb2_16_loop:
/* Load length */
LDR r2, [sp, #20]
/* Reduce mod 2^130 - 5 */
BIC r3, r8, #0x3
AND r8, r8, #0x3
BIC r3, r8, #3
AND r8, r8, #3
ADDS r4, r4, r3
LSR r3, r3, #2
ADCS r5, r5, r9
@@ -241,7 +241,7 @@ L_poly1305_thumb2_16_loop:
ADCS r7, r7, r11
ADC r8, r8, r12
/* Sub 16 from length. */
SUBS r2, r2, #0x10
SUBS r2, r2, #16
/* Store length. */
STR r2, [sp, #20]
/* Loop again if more message to do. */
@@ -252,7 +252,7 @@ L_poly1305_thumb2_16_loop:
#endif
STM lr, {r4, r5, r6, r7, r8}
L_poly1305_thumb2_16_done:
ADD sp, sp, #0x1c
ADD sp, sp, #28
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 250 */
.size poly1305_blocks_thumb2_16,.-poly1305_blocks_thumb2_16
@@ -285,7 +285,7 @@ poly1305_set_key:
LDR r3, [r1, #20]
LDR r4, [r1, #24]
LDR r5, [r1, #28]
ADD r10, r0, #0x24
ADD r10, r0, #36
STM r10, {r2, r3, r4, r5}
/* Load, mask and store r. */
LDR r2, [r1]
@@ -296,14 +296,14 @@ poly1305_set_key:
AND r3, r3, r7
AND r4, r4, r8
AND r5, r5, r9
ADD r10, r0, #0x0
ADD r10, r0, #0
STM r10, {r2, r3, r4, r5}
/* h (accumulator) = 0 */
EOR r6, r6, r6
EOR r7, r7, r7
EOR r8, r8, r8
EOR r9, r9, r9
ADD r10, r0, #0x10
ADD r10, r0, #16
EOR r5, r5, r5
STM r10, {r5, r6, r7, r8, r9}
/* Zero leftover */
@@ -317,25 +317,25 @@ poly1305_set_key:
.type poly1305_final, %function
poly1305_final:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ADD r11, r0, #0x10
ADD r11, r0, #16
LDM r11, {r2, r3, r4, r5, r6}
/* Add 5 and check for h larger than p. */
ADDS r7, r2, #0x5
ADCS r7, r3, #0x0
ADCS r7, r4, #0x0
ADCS r7, r5, #0x0
ADC r7, r6, #0x0
SUB r7, r7, #0x4
ADDS r7, r2, #5
ADCS r7, r3, #0
ADCS r7, r4, #0
ADCS r7, r5, #0
ADC r7, r6, #0
SUB r7, r7, #4
LSR r7, r7, #31
SUB r7, r7, #0x1
AND r7, r7, #0x5
SUB r7, r7, #1
AND r7, r7, #5
/* Add 0/5 to h. */
ADDS r2, r2, r7
ADCS r3, r3, #0x0
ADCS r4, r4, #0x0
ADC r5, r5, #0x0
ADCS r3, r3, #0
ADCS r4, r4, #0
ADC r5, r5, #0
/* Add padding */
ADD r11, r0, #0x24
ADD r11, r0, #36
LDM r11, {r7, r8, r9, r10}
ADDS r2, r2, r7
ADCS r3, r3, r8
@@ -352,13 +352,13 @@ poly1305_final:
EOR r4, r4, r4
EOR r5, r5, r5
EOR r6, r6, r6
ADD r11, r0, #0x10
ADD r11, r0, #16
STM r11, {r2, r3, r4, r5, r6}
/* Zero out r. */
ADD r11, r0, #0x0
ADD r11, r0, #0
STM r11, {r2, r3, r4, r5}
/* Zero out padding. */
ADD r11, r0, #0x24
ADD r11, r0, #36
STM r11, {r2, r3, r4, r5}
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 82 */
+29 -29
View File
@@ -66,8 +66,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x1c\n\t"
"CMP %[len], #0x0\n\t"
"SUB sp, sp, #28\n\t"
"CMP %[len], #0\n\t"
#if defined(__GNUC__)
"BEQ L_poly1305_thumb2_16_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -75,10 +75,10 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#else
"BEQ.N L_poly1305_thumb2_16_done_%=\n\t"
#endif
"ADD lr, sp, #0xc\n\t"
"ADD lr, sp, #12\n\t"
"STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t"
/* Get h pointer */
"ADD lr, %[ctx], #0x10\n\t"
"ADD lr, %[ctx], #16\n\t"
"LDM lr, {r4, r5, r6, r7, r8}\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -97,7 +97,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"ADCS r5, r5, %[notLast]\n\t"
"ADCS r6, r6, r9\n\t"
"ADCS r7, r7, r10\n\t"
"ADD %[m], %[m], #0x10\n\t"
"ADD %[m], %[m], #16\n\t"
"ADC r8, r8, r11\n\t"
#ifdef WOLFSSL_ARM_ARCH_7M
"STM lr, {r4, r5, r6, r7, r8}\n\t"
@@ -239,7 +239,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"LDR r5, [lr, #16]\n\t"
/* r[3] * h[3] */
"UMAAL r10, r11, %[notLast], r4\n\t"
"MOV r12, #0x0\n\t"
"MOV r12, #0\n\t"
/* r[0] * h[4] */
"UMAAL r8, r12, %[ctx], r5\n\t"
/* r[1] * h[4] */
@@ -255,8 +255,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
/* Load length */
"LDR %[len], [sp, #20]\n\t"
/* Reduce mod 2^130 - 5 */
"BIC %[notLast], r8, #0x3\n\t"
"AND r8, r8, #0x3\n\t"
"BIC %[notLast], r8, #3\n\t"
"AND r8, r8, #3\n\t"
"ADDS r4, r4, %[notLast]\n\t"
"LSR %[notLast], %[notLast], #2\n\t"
"ADCS r5, r5, r9\n\t"
@@ -275,7 +275,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"ADCS r7, r7, r11\n\t"
"ADC r8, r8, r12\n\t"
/* Sub 16 from length. */
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
/* Store length. */
"STR %[len], [sp, #20]\n\t"
/* Loop again if more message to do. */
@@ -293,7 +293,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#else
"L_poly1305_thumb2_16_done_%=:\n\t"
#endif
"ADD sp, sp, #0x1c\n\t"
"ADD sp, sp, #28\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len),
[notLast] "+r" (notLast)
@@ -337,7 +337,7 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key)
"LDR r3, [%[key], #20]\n\t"
"LDR r4, [%[key], #24]\n\t"
"LDR r5, [%[key], #28]\n\t"
"ADD r10, %[ctx], #0x24\n\t"
"ADD r10, %[ctx], #36\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
/* Load, mask and store r. */
"LDR r2, [%[key]]\n\t"
@@ -348,14 +348,14 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key)
"AND r3, r3, r7\n\t"
"AND r4, r4, r8\n\t"
"AND r5, r5, r9\n\t"
"ADD r10, %[ctx], #0x0\n\t"
"ADD r10, %[ctx], #0\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
/* h (accumulator) = 0 */
"EOR r6, r6, r6\n\t"
"EOR r7, r7, r7\n\t"
"EOR r8, r8, r8\n\t"
"EOR r9, r9, r9\n\t"
"ADD r10, %[ctx], #0x10\n\t"
"ADD r10, %[ctx], #16\n\t"
"EOR r5, r5, r5\n\t"
"STM r10, {r5, r6, r7, r8, r9}\n\t"
/* Zero leftover */
@@ -385,25 +385,25 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"ADD r11, %[ctx], #0x10\n\t"
"ADD r11, %[ctx], #16\n\t"
"LDM r11, {r2, r3, r4, r5, r6}\n\t"
/* Add 5 and check for h larger than p. */
"ADDS r7, r2, #0x5\n\t"
"ADCS r7, r3, #0x0\n\t"
"ADCS r7, r4, #0x0\n\t"
"ADCS r7, r5, #0x0\n\t"
"ADC r7, r6, #0x0\n\t"
"SUB r7, r7, #0x4\n\t"
"ADDS r7, r2, #5\n\t"
"ADCS r7, r3, #0\n\t"
"ADCS r7, r4, #0\n\t"
"ADCS r7, r5, #0\n\t"
"ADC r7, r6, #0\n\t"
"SUB r7, r7, #4\n\t"
"LSR r7, r7, #31\n\t"
"SUB r7, r7, #0x1\n\t"
"AND r7, r7, #0x5\n\t"
"SUB r7, r7, #1\n\t"
"AND r7, r7, #5\n\t"
/* Add 0/5 to h. */
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, #0x0\n\t"
"ADCS r4, r4, #0x0\n\t"
"ADC r5, r5, #0x0\n\t"
"ADCS r3, r3, #0\n\t"
"ADCS r4, r4, #0\n\t"
"ADC r5, r5, #0\n\t"
/* Add padding */
"ADD r11, %[ctx], #0x24\n\t"
"ADD r11, %[ctx], #36\n\t"
"LDM r11, {r7, r8, r9, r10}\n\t"
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, r8\n\t"
@@ -420,13 +420,13 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac)
"EOR r4, r4, r4\n\t"
"EOR r5, r5, r5\n\t"
"EOR r6, r6, r6\n\t"
"ADD r11, %[ctx], #0x10\n\t"
"ADD r11, %[ctx], #16\n\t"
"STM r11, {r2, r3, r4, r5, r6}\n\t"
/* Zero out r. */
"ADD r11, %[ctx], #0x0\n\t"
"ADD r11, %[ctx], #0\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
/* Zero out padding. */
"ADD r11, %[ctx], #0x24\n\t"
"ADD r11, %[ctx], #36\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [mac] "+r" (mac)
+21 -21
View File
@@ -128,7 +128,7 @@ L_SHA256_transform_len_begin:
LDR r4, [r0, #8]
EOR r11, r11, r4
#ifndef WOLFSSL_ARMASM_SHA256_SMALL
MOV r3, #0x3
MOV r3, #3
/* Start of 16 rounds */
L_SHA256_transform_len_start_fast:
/* Round 0 */
@@ -884,7 +884,7 @@ L_SHA256_transform_len_start_fast:
ADD r9, r9, r4
STR r9, [sp, #60]
ADD r12, r12, #0x40
SUBS r3, r3, #0x1
SUBS r3, r3, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start_fast
#else
@@ -1403,10 +1403,10 @@ L_SHA256_transform_len_start_fast:
STR r8, [r0, #16]
STR r9, [r0]
#else
MOV r3, #0x4
MOV r3, #4
/* Start of 16 rounds */
L_SHA256_transform_len_start_small:
SUB r3, r3, #0x1
SUB r3, r3, #1
/* Round 0 */
LDR r5, [r0, #16]
LDR r6, [r0, #20]
@@ -1439,7 +1439,7 @@ L_SHA256_transform_len_start_small:
ADD r9, r9, r11
STR r8, [r0, #12]
STR r9, [r0, #28]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_0
#else
@@ -1493,7 +1493,7 @@ L_SHA256_transform_len_blk_end_0:
ADD r9, r9, r10
STR r8, [r0, #8]
STR r9, [r0, #24]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_1
#else
@@ -1547,7 +1547,7 @@ L_SHA256_transform_len_blk_end_1:
ADD r9, r9, r11
STR r8, [r0, #4]
STR r9, [r0, #20]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_2
#else
@@ -1601,7 +1601,7 @@ L_SHA256_transform_len_blk_end_2:
ADD r9, r9, r10
STR r8, [r0]
STR r9, [r0, #16]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_3
#else
@@ -1655,7 +1655,7 @@ L_SHA256_transform_len_blk_end_3:
ADD r9, r9, r11
STR r8, [r0, #28]
STR r9, [r0, #12]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_4
#else
@@ -1709,7 +1709,7 @@ L_SHA256_transform_len_blk_end_4:
ADD r9, r9, r10
STR r8, [r0, #24]
STR r9, [r0, #8]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_5
#else
@@ -1763,7 +1763,7 @@ L_SHA256_transform_len_blk_end_5:
ADD r9, r9, r11
STR r8, [r0, #20]
STR r9, [r0, #4]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_6
#else
@@ -1817,7 +1817,7 @@ L_SHA256_transform_len_blk_end_6:
ADD r9, r9, r10
STR r8, [r0, #16]
STR r9, [r0]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_7
#else
@@ -1871,7 +1871,7 @@ L_SHA256_transform_len_blk_end_7:
ADD r9, r9, r11
STR r8, [r0, #12]
STR r9, [r0, #28]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_8
#else
@@ -1925,7 +1925,7 @@ L_SHA256_transform_len_blk_end_8:
ADD r9, r9, r10
STR r8, [r0, #8]
STR r9, [r0, #24]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_9
#else
@@ -1979,7 +1979,7 @@ L_SHA256_transform_len_blk_end_9:
ADD r9, r9, r11
STR r8, [r0, #4]
STR r9, [r0, #20]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_10
#else
@@ -2033,7 +2033,7 @@ L_SHA256_transform_len_blk_end_10:
ADD r9, r9, r10
STR r8, [r0]
STR r9, [r0, #16]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_11
#else
@@ -2087,7 +2087,7 @@ L_SHA256_transform_len_blk_end_11:
ADD r9, r9, r11
STR r8, [r0, #28]
STR r9, [r0, #12]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_12
#else
@@ -2141,7 +2141,7 @@ L_SHA256_transform_len_blk_end_12:
ADD r9, r9, r10
STR r8, [r0, #24]
STR r9, [r0, #8]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_13
#else
@@ -2195,7 +2195,7 @@ L_SHA256_transform_len_blk_end_13:
ADD r9, r9, r11
STR r8, [r0, #20]
STR r9, [r0, #4]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_14
#else
@@ -2249,7 +2249,7 @@ L_SHA256_transform_len_blk_end_14:
ADD r9, r9, r10
STR r8, [r0, #16]
STR r9, [r0]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_15
#else
@@ -2271,7 +2271,7 @@ L_SHA256_transform_len_blk_end_14:
ADD r9, r9, r4
STR r9, [sp, #60]
L_SHA256_transform_len_blk_end_15:
CMP r3, #0x0
CMP r3, #0
ADD r12, r12, #0x40
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start_small
+21 -21
View File
@@ -155,7 +155,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"LDR r4, [%[sha256], #8]\n\t"
"EOR r11, r11, r4\n\t"
#ifndef WOLFSSL_ARMASM_SHA256_SMALL
"MOV r3, #0x3\n\t"
"MOV r3, #3\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -916,7 +916,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r4\n\t"
"STR r9, [sp, #60]\n\t"
"ADD r12, r12, #0x40\n\t"
"SUBS r3, r3, #0x1\n\t"
"SUBS r3, r3, #1\n\t"
#if defined(__GNUC__)
"BNE L_SHA256_transform_len_start_fast_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1437,7 +1437,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
#else
"MOV r3, #0x4\n\t"
"MOV r3, #4\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1445,7 +1445,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
#else
"L_SHA256_transform_len_start_small_%=:\n\t"
#endif
"SUB r3, r3, #0x1\n\t"
"SUB r3, r3, #1\n\t"
/* Round 0 */
"LDR r5, [%[sha256], #16]\n\t"
"LDR r6, [%[sha256], #20]\n\t"
@@ -1478,7 +1478,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #12]\n\t"
"STR r9, [%[sha256], #28]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_0_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1539,7 +1539,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #8]\n\t"
"STR r9, [%[sha256], #24]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_1_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1600,7 +1600,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #4]\n\t"
"STR r9, [%[sha256], #20]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_2_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1661,7 +1661,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256]]\n\t"
"STR r9, [%[sha256], #16]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_3_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1722,7 +1722,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #28]\n\t"
"STR r9, [%[sha256], #12]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_4_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1783,7 +1783,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #24]\n\t"
"STR r9, [%[sha256], #8]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_5_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1844,7 +1844,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #20]\n\t"
"STR r9, [%[sha256], #4]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_6_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1905,7 +1905,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_7_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1966,7 +1966,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #12]\n\t"
"STR r9, [%[sha256], #28]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_8_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2027,7 +2027,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #8]\n\t"
"STR r9, [%[sha256], #24]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_9_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2088,7 +2088,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #4]\n\t"
"STR r9, [%[sha256], #20]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_10_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2149,7 +2149,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256]]\n\t"
"STR r9, [%[sha256], #16]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_11_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2210,7 +2210,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #28]\n\t"
"STR r9, [%[sha256], #12]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_12_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2271,7 +2271,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #24]\n\t"
"STR r9, [%[sha256], #8]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_13_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2332,7 +2332,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #20]\n\t"
"STR r9, [%[sha256], #4]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_14_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2393,7 +2393,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_15_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2422,7 +2422,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
#else
"L_SHA256_transform_len_blk_end_15_%=:\n\t"
#endif
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
"ADD r12, r12, #0x40\n\t"
#if defined(__GNUC__)
"BNE L_SHA256_transform_len_start_small_%=\n\t"
+4 -4
View File
@@ -67,7 +67,7 @@ BlockSha3:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0xcc
ADR r1, L_sha3_thumb2_rt
MOV r2, #0xc
MOV r2, #12
L_sha3_thumb2_begin:
STR r2, [sp, #200]
/* Round even */
@@ -336,7 +336,7 @@ L_sha3_thumb2_begin:
STR lr, [sp, #36]
/* Get constant */
LDRD r10, r11, [r1]
ADD r1, r1, #0x8
ADD r1, r1, #8
BIC r12, r6, r4
BIC lr, r7, r5
EOR r12, r12, r2
@@ -864,7 +864,7 @@ L_sha3_thumb2_begin:
STR lr, [r0, #36]
/* Get constant */
LDRD r10, r11, [r1]
ADD r1, r1, #0x8
ADD r1, r1, #8
BIC r12, r6, r4
BIC lr, r7, r5
EOR r12, r12, r2
@@ -1127,7 +1127,7 @@ L_sha3_thumb2_begin:
STR r12, [r0, #160]
STR lr, [r0, #164]
LDR r2, [sp, #200]
SUBS r2, r2, #0x1
SUBS r2, r2, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_sha3_thumb2_begin
#else
+4 -4
View File
@@ -82,7 +82,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
__asm__ __volatile__ (
"SUB sp, sp, #0xcc\n\t"
"MOV r1, %[L_sha3_thumb2_rt]\n\t"
"MOV r2, #0xc\n\t"
"MOV r2, #12\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sha3_thumb2_begin:\n\t"
@@ -356,7 +356,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR lr, [sp, #36]\n\t"
/* Get constant */
"LDRD r10, r11, [r1]\n\t"
"ADD r1, r1, #0x8\n\t"
"ADD r1, r1, #8\n\t"
"BIC r12, r6, r4\n\t"
"BIC lr, r7, r5\n\t"
"EOR r12, r12, r2\n\t"
@@ -884,7 +884,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR lr, [%[state], #36]\n\t"
/* Get constant */
"LDRD r10, r11, [r1]\n\t"
"ADD r1, r1, #0x8\n\t"
"ADD r1, r1, #8\n\t"
"BIC r12, r6, r4\n\t"
"BIC lr, r7, r5\n\t"
"EOR r12, r12, r2\n\t"
@@ -1147,7 +1147,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR r12, [%[state], #160]\n\t"
"STR lr, [%[state], #164]\n\t"
"LDR r2, [sp, #200]\n\t"
"SUBS r2, r2, #0x1\n\t"
"SUBS r2, r2, #1\n\t"
#if defined(__GNUC__)
"BNE L_sha3_thumb2_begin_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
+2 -2
View File
@@ -217,7 +217,7 @@ L_SHA512_transform_len_begin:
LDRD r4, r5, [r0, #16]
EOR r10, r10, r4
EOR r11, r11, r5
MOV r12, #0x4
MOV r12, #4
/* Start of 16 rounds */
L_SHA512_transform_len_start:
/* Round 0 */
@@ -2205,7 +2205,7 @@ L_SHA512_transform_len_start:
ADC r5, r5, r7
STRD r4, r5, [sp, #120]
ADD r3, r3, #0x80
SUBS r12, r12, #0x1
SUBS r12, r12, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_start
#else
+2 -2
View File
@@ -244,7 +244,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512,
"LDRD r4, r5, [%[sha512], #16]\n\t"
"EOR r10, r10, r4\n\t"
"EOR r11, r11, r5\n\t"
"MOV r12, #0x4\n\t"
"MOV r12, #4\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2237,7 +2237,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512,
"ADC r5, r5, r7\n\t"
"STRD r4, r5, [sp, #120]\n\t"
"ADD r3, r3, #0x80\n\t"
"SUBS r12, r12, #0x1\n\t"
"SUBS r12, r12, #1\n\t"
#if defined(__GNUC__)
"BNE L_SHA512_transform_len_start_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)