Merge pull request #10725 from SparkiDev/aes_x25519_arm32_thumb2_fixes

ARM32/Thumb2: generated asm fixes
This commit is contained in:
Daniel Pouzzner
2026-06-30 10:47:03 -05:00
committed by GitHub
24 changed files with 4971 additions and 4766 deletions
+170 -59
View File
@@ -8367,37 +8367,36 @@ L_AES_set_encrypt_key_loop_256:
add r2, r2, #16
stm r2, {r4, r5, r6, r7}
sub r2, r2, #16
mov r3, r7
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r4, r3, #16
lsl r4, r7, #16
lsr r4, r4, #24
#else
uxtb r4, r3, ror #8
uxtb r4, r7, ror #8
#endif
#else
ubfx r4, r3, #8, #8
ubfx r4, r7, #8, #8
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r5, r3, #8
lsl r5, r7, #8
lsr r5, r5, #24
#else
uxtb r5, r3, ror #16
uxtb r5, r7, ror #16
#endif
#else
ubfx r5, r3, #16, #8
ubfx r5, r7, #16, #8
#endif
lsr r6, r3, #24
lsr r6, r7, #24
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
lsl r3, r3, #24
lsl r3, r7, #24
lsr r3, r3, #24
#else
uxtb r3, r3
uxtb r3, r7
#endif
#else
ubfx r3, r3, #0, #8
ubfx r3, r7, #0, #8
#endif
ldrb r4, [r8, r4, lsl #2]
ldrb r6, [r8, r6, lsl #2]
@@ -23252,14 +23251,22 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #12]
ldr r3, [r2, #12]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r8, r9, r10, r11}
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23275,9 +23282,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23292,9 +23303,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23309,9 +23324,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23326,9 +23345,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23359,9 +23382,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23385,8 +23412,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #8]
ldr r3, [r2, #8]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23396,7 +23427,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23412,9 +23447,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23429,9 +23468,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23446,9 +23489,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23463,9 +23510,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23496,9 +23547,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23522,8 +23577,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0, #4]
ldr r3, [r2, #4]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23533,7 +23592,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23549,9 +23612,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23566,9 +23633,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23583,9 +23654,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23600,9 +23675,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23633,9 +23712,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23659,8 +23742,12 @@ L_GCM_gmult_len_start_block:
ldr r12, [r0]
ldr r3, [r2]
eor r12, r12, r3
lsr r3, r12, #24
and r3, r3, #15
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r3, r12, #4
lsr r3, r3, #28
#else
ubfx r3, r12, #24, #4
#endif
add r3, r1, r3, lsl #4
ldm r3, {r4, r5, r6, r7}
eor r8, r8, r4
@@ -23670,7 +23757,11 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsr r4, r12, #28
#else
ubfx r4, r12, #28, #4
#endif
eor r11, r11, r10, lsl #28
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
@@ -23686,9 +23777,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #16
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #12
lsr r4, r4, #28
#else
ubfx r4, r12, #16, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23703,9 +23798,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #20
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #8
lsr r4, r4, #28
#else
ubfx r4, r12, #20, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23720,9 +23819,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #20
lsr r4, r4, #28
#else
ubfx r4, r12, #8, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23737,9 +23840,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #12
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #16
lsr r4, r4, #28
#else
ubfx r4, r12, #12, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
@@ -23770,9 +23877,13 @@ L_GCM_gmult_len_start_block:
lsr r6, r10, #4
and r3, r11, #15
lsr r11, r11, #4
lsr r4, r12, #4
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
lsl r4, r12, #24
lsr r4, r4, #28
#else
ubfx r4, r12, #4, #4
#endif
eor r11, r11, r10, lsl #28
and r4, r4, #15
ldr r3, [lr, r3, lsl #2]
add r4, r1, r4, lsl #4
eor r10, r6, r9, lsl #28
+170 -59
View File
@@ -8993,37 +8993,36 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
"add %[ks], %[ks], #16\n\t"
"stm %[ks], {r4, r5, r6, r7}\n\t"
"sub %[ks], %[ks], #16\n\t"
"mov r3, r7\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r4, r3, #16\n\t"
"lsl r4, r7, #16\n\t"
"lsr r4, r4, #24\n\t"
#else
"uxtb r4, r3, ror #8\n\t"
"uxtb r4, r7, ror #8\n\t"
#endif
#else
"ubfx r4, r3, #8, #8\n\t"
"ubfx r4, r7, #8, #8\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r5, r3, #8\n\t"
"lsl r5, r7, #8\n\t"
"lsr r5, r5, #24\n\t"
#else
"uxtb r5, r3, ror #16\n\t"
"uxtb r5, r7, ror #16\n\t"
#endif
#else
"ubfx r5, r3, #16, #8\n\t"
"ubfx r5, r7, #16, #8\n\t"
#endif
"lsr r6, r3, #24\n\t"
"lsr r6, r7, #24\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"lsl r3, r3, #24\n\t"
"lsl r3, r7, #24\n\t"
"lsr r3, r3, #24\n\t"
#else
"uxtb r3, r3\n\t"
"uxtb r3, r7\n\t"
#endif
#else
"ubfx r3, r3, #0, #8\n\t"
"ubfx r3, r7, #0, #8\n\t"
#endif
"ldrb r4, [r8, r4, lsl #2]\n\t"
"ldrb r6, [r8, r6, lsl #2]\n\t"
@@ -24111,14 +24110,22 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #12]\n\t"
"ldr %[len], [r2, #12]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r8, r9, r10, r11}\n\t"
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24134,9 +24141,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24151,9 +24162,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24168,9 +24183,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24185,9 +24204,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24218,9 +24241,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24244,8 +24271,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #8]\n\t"
"ldr %[len], [r2, #8]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24255,7 +24286,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24271,9 +24306,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24288,9 +24327,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24305,9 +24348,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24322,9 +24369,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24355,9 +24406,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24381,8 +24436,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0, #4]\n\t"
"ldr %[len], [r2, #4]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24392,7 +24451,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24408,9 +24471,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24425,9 +24492,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24442,9 +24513,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24459,9 +24534,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24492,9 +24571,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24518,8 +24601,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"ldr r12, [r0]\n\t"
"ldr %[len], [r2]\n\t"
"eor r12, r12, %[len]\n\t"
"lsr %[len], r12, #24\n\t"
"and %[len], %[len], #15\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl %[len], r12, #4\n\t"
"lsr %[len], %[len], #28\n\t"
#else
"ubfx %[len], r12, #24, #4\n\t"
#endif
"add %[len], %[m], %[len], lsl #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t"
"eor r8, r8, r4\n\t"
@@ -24529,7 +24616,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsr r4, r12, #28\n\t"
#else
"ubfx r4, r12, #28, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
@@ -24545,9 +24636,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #16\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #12\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #16, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24562,9 +24657,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #20\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #8\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #20, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24579,9 +24678,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #20\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #8, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24596,9 +24699,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #12\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #16\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #12, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
@@ -24629,9 +24736,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x,
"lsr r6, r10, #4\n\t"
"and %[len], r11, #15\n\t"
"lsr r11, r11, #4\n\t"
"lsr r4, r12, #4\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"lsl r4, r12, #24\n\t"
"lsr r4, r4, #28\n\t"
#else
"ubfx r4, r12, #4, #4\n\t"
#endif
"eor r11, r11, r10, lsl #28\n\t"
"and r4, r4, #15\n\t"
"ldr %[len], [lr, r3, lsl #2]\n\t"
"add r4, %[m], r4, lsl #4\n\t"
"eor r10, r6, r9, lsl #28\n\t"
+11 -23
View File
@@ -155,8 +155,7 @@ fe_add_sub_op:
#endif
# Sub
sbcs r10, r4, r6
sbcs r11, r5, r7
sbc lr, lr, lr
sbc r11, r5, r7
# Add
subs r12, r12, #1
adcs r8, r4, r6
@@ -222,12 +221,9 @@ fe_add_sub_op:
#else
strd r8, r9, [r0, #24]
#endif
# Multiply -modulus by underflow
lsl r3, lr, #1
mvn lr, #18
orr r3, r3, r11, lsr #31
mul lr, r3, lr
# Sub -x*modulus (if overflow)
# Add -modulus on underflow
mov lr, #19
and lr, lr, r11, asr #31
ldm r1, {r4, r5, r6, r7, r8, r9}
subs r4, r4, lr
sbcs r5, r5, #0
@@ -263,12 +259,9 @@ fe_sub_op:
sbcs r10, r2, r10
sbcs r11, r3, r11
sbcs r12, r4, r12
sbcs lr, r5, lr
sbc r3, r3, r3
mvn r2, #18
lsl r3, r3, #1
orr r3, r3, lr, lsr #31
mul r2, r3, r2
sbc lr, r5, lr
mov r2, #19
and r2, r2, lr, asr #31
subs r6, r6, r2
sbcs r7, r7, #0
sbcs r8, r8, #0
@@ -312,13 +305,9 @@ fe_add_op:
adcs r10, r2, r10
adcs r11, r3, r11
adcs r12, r4, r12
mov r3, #0
adcs lr, r5, lr
adc r3, r3, #0
adc lr, r5, lr
mov r2, #19
lsl r3, r3, #1
orr r3, r3, lr, lsr #31
mul r2, r3, r2
and r2, r2, lr, asr #31
adds r6, r6, r2
adcs r7, r7, #0
adcs r8, r8, #0
@@ -575,6 +564,7 @@ fe_isnonzero:
fe_isnegative:
push {r4, r5, lr}
ldm r0!, {r2, r3, r4, r5}
and r12, r2, #1
adds r1, r2, #19
adcs r1, r3, #0
adcs r1, r4, #0
@@ -583,11 +573,9 @@ fe_isnegative:
adcs r1, r2, #0
adcs r1, r3, #0
adcs r1, r4, #0
ldr r2, [r0, #-16]
adc r1, r5, #0
and r0, r2, #1
lsr r1, r1, #31
eor r0, r0, r1
eor r0, r12, r1
pop {r4, r5, pc}
.size fe_isnegative,.-fe_isnegative
#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || defined(WOLFSSL_CURVE25519_USE_ED25519)
+28 -40
View File
@@ -59,9 +59,9 @@
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_init(void)
WC_OMIT_FRAME_POINTER void fe_init()
#else
WC_OMIT_FRAME_POINTER void fe_init(void)
WC_OMIT_FRAME_POINTER void fe_init()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init(void)
void fe_add_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_add_sub_op()
#else
WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_add_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -198,8 +198,7 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
#endif
/* Sub */
"sbcs r10, r4, r6\n\t"
"sbcs r11, r5, r7\n\t"
"sbc lr, lr, lr\n\t"
"sbc r11, r5, r7\n\t"
/* Add */
"subs r12, r12, #1\n\t"
"adcs r8, r4, r6\n\t"
@@ -265,12 +264,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
#else
"strd r8, r9, [r0, #24]\n\t"
#endif
/* Multiply -modulus by underflow */
"lsl r3, lr, #1\n\t"
"mvn lr, #18\n\t"
"orr r3, r3, r11, lsr #31\n\t"
"mul lr, r3, lr\n\t"
/* Sub -x*modulus (if overflow) */
/* Add -modulus on underflow */
"mov lr, #19\n\t"
"and lr, lr, r11, asr #31\n\t"
"ldm r1, {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, lr\n\t"
"sbcs r5, r5, #0\n\t"
@@ -300,9 +296,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void)
void fe_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_sub_op()
#else
WC_OMIT_FRAME_POINTER void fe_sub_op(void)
WC_OMIT_FRAME_POINTER void fe_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -319,12 +315,9 @@ WC_OMIT_FRAME_POINTER void fe_sub_op(void)
"sbcs r10, r2, r10\n\t"
"sbcs r11, r3, r11\n\t"
"sbcs r12, r4, r12\n\t"
"sbcs lr, r5, lr\n\t"
"sbc r3, r3, r3\n\t"
"mvn r2, #18\n\t"
"lsl r3, r3, #1\n\t"
"orr r3, r3, lr, lsr #31\n\t"
"mul r2, r3, r2\n\t"
"sbc lr, r5, lr\n\t"
"mov r2, #19\n\t"
"and r2, r2, lr, asr #31\n\t"
"subs r6, r6, r2\n\t"
"sbcs r7, r7, #0\n\t"
"sbcs r8, r8, #0\n\t"
@@ -379,9 +372,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b)
void fe_add_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_add_op(void)
WC_OMIT_FRAME_POINTER void fe_add_op()
#else
WC_OMIT_FRAME_POINTER void fe_add_op(void)
WC_OMIT_FRAME_POINTER void fe_add_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -398,13 +391,9 @@ WC_OMIT_FRAME_POINTER void fe_add_op(void)
"adcs r10, r2, r10\n\t"
"adcs r11, r3, r11\n\t"
"adcs r12, r4, r12\n\t"
"mov r3, #0\n\t"
"adcs lr, r5, lr\n\t"
"adc r3, r3, #0\n\t"
"adc lr, r5, lr\n\t"
"mov r2, #19\n\t"
"lsl r3, r3, #1\n\t"
"orr r3, r3, lr, lsr #31\n\t"
"mul r2, r3, r2\n\t"
"and r2, r2, lr, asr #31\n\t"
"adds r6, r6, r2\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, #0\n\t"
@@ -797,6 +786,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
__asm__ __volatile__ (
"ldm %[a]!, {r2, r3, r4, r5}\n\t"
"and r12, r2, #1\n\t"
"adds r1, r2, #19\n\t"
"adcs r1, r3, #0\n\t"
"adcs r1, r4, #0\n\t"
@@ -805,11 +795,9 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
"adcs r1, r2, #0\n\t"
"adcs r1, r3, #0\n\t"
"adcs r1, r4, #0\n\t"
"ldr r2, [%[a], #-16]\n\t"
"adc r1, r5, #0\n\t"
"and %[a], r2, #1\n\t"
"lsr r1, r1, #31\n\t"
"eor %[a], %[a], r1\n\t"
"eor %[a], r12, r1\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [a] "+r" (a)
:
@@ -817,7 +805,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a)
:
: [a] "r" (a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r1", "r2", "r3", "r4", "r5"
: "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r12"
);
return (word32)(size_t)a;
}
@@ -2510,9 +2498,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#else
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -2905,9 +2893,9 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void)
#else
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#else
WC_OMIT_FRAME_POINTER void fe_mul_op(void)
WC_OMIT_FRAME_POINTER void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -3086,9 +3074,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#else
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
@@ -3374,9 +3362,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void)
#else
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#else
WC_OMIT_FRAME_POINTER void fe_sq_op(void)
WC_OMIT_FRAME_POINTER void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
+38 -46
View File
@@ -5736,7 +5736,7 @@ L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -7099,10 +7099,10 @@ L_aes_gcm_encrypt_arm64_crypto_192_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -8637,10 +8637,10 @@ L_aes_gcm_encrypt_arm64_crypto_256_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -9944,10 +9944,10 @@ L_aes_gcm_encrypt_arm64_crypto_128_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -10537,7 +10537,7 @@ L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes:
# Done GHASH
L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -11893,10 +11893,10 @@ L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -11945,7 +11945,6 @@ L_aes_gcm_decrypt_arm64_crypto_192_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_192_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -13447,10 +13446,10 @@ L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -13507,7 +13506,6 @@ L_aes_gcm_decrypt_arm64_crypto_256_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_256_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -14770,10 +14768,10 @@ L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -14818,7 +14816,6 @@ L_aes_gcm_decrypt_arm64_crypto_128_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_128_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -15367,7 +15364,7 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -16701,10 +16698,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -18210,10 +18207,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -19488,10 +19485,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero:
# Done GHASH
L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -20059,7 +20056,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes:
# Done GHASH
L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done:
eor x14, x14, x14
ubfiz x24, x4, #3, #32
lsl x24, x4, #3
mov v28.d[0], x14
mov v28.d[1], x24
rev64 v28.16b, v28.16b
@@ -21386,10 +21383,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -21437,7 +21434,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -22911,10 +22907,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
aese v14.16b, v0.16b
@@ -22970,7 +22966,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -24205,10 +24200,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte:
L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes:
L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v14.2d}, [x12]
ubfiz x8, x8, #3, #32
lsl x8, x8, #3
rbit x8, x8
mov v28.d[0], x8
ubfiz x2, x2, #3, #32
lsl x2, x2, #3
rbit x2, x2
mov v28.d[1], x2
eor v26.16b, v26.16b, v28.16b
@@ -24252,7 +24247,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done:
ld1 {v28.16b}, [x5]
b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded
L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag:
ubfiz x6, x6, #0, #32
eor v28.16b, v28.16b, v28.16b
mov x17, x6
st1 {v28.2d}, [x11]
@@ -24426,7 +24420,7 @@ L_aes_gcm_init_arm64_crypto_end_bytes:
# Done GHASH
L_aes_gcm_init_arm64_crypto_partial_done:
eor x7, x7, x7
ubfiz x13, x3, #3, #32
lsl x13, x3, #3
mov v7.d[0], x7
mov v7.d[1], x13
rev64 v7.16b, v7.16b
@@ -28788,10 +28782,10 @@ _AES_GCM_encrypt_final_AARCH64:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -32674,10 +32668,10 @@ _AES_GCM_decrypt_final_AARCH64:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -32702,7 +32696,6 @@ _AES_GCM_decrypt_final_AARCH64:
ld1 {v0.16b}, [x1]
b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded
L_aes_gcm_decrypt_final_arm64_crypto_part_tag:
ubfiz x2, x2, #0, #32
eor v0.16b, v0.16b, v0.16b
mov x10, x2
st1 {v0.2d}, [x0]
@@ -32863,7 +32856,7 @@ L_aes_gcm_init_arm64_crypto_eor3_end_bytes:
# Done GHASH
L_aes_gcm_init_arm64_crypto_eor3_partial_done:
eor x7, x7, x7
ubfiz x13, x3, #3, #32
lsl x13, x3, #3
mov v7.d[0], x7
mov v7.d[1], x13
rev64 v7.16b, v7.16b
@@ -37121,10 +37114,10 @@ _AES_GCM_encrypt_final_AARCH64_EOR3:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -40922,10 +40915,10 @@ _AES_GCM_decrypt_final_AARCH64_EOR3:
ld1 {v4.2d}, [x5]
ushr v6.2d, v6.2d, #56
ld1 {v7.2d}, [x6]
ubfiz x4, x4, #3, #32
lsl x4, x4, #3
rbit x4, x4
mov v0.d[0], x4
ubfiz x3, x3, #3, #32
lsl x3, x3, #3
rbit x3, x3
mov v0.d[1], x3
eor v5.16b, v5.16b, v0.16b
@@ -40949,7 +40942,6 @@ _AES_GCM_decrypt_final_AARCH64_EOR3:
ld1 {v0.16b}, [x1]
b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded
L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag:
ubfiz x2, x2, #0, #32
eor v0.16b, v0.16b, v0.16b
mov x10, x2
st1 {v0.2d}, [x0]
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -38,7 +38,7 @@
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#include <wolfssl/wolfcrypt/fe_operations.h>
void fe_init(void)
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
@@ -229,8 +229,8 @@ int fe_isnonzero(const fe a)
"orr %x[a], x1, x2\n\t"
"orr x3, x3, x4\n\t"
"orr %x[a], %x[a], x3\n\t"
: [a] "+r" (a)
:
: [a] "r" (a)
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6"
);
return (word32)(size_t)a;
@@ -248,8 +248,8 @@ int fe_isnegative(const fe a)
"adc x5, x4, xzr\n\t"
"and %x[a], x1, #1\n\t"
"eor %x[a], %x[a], x5, lsr 63\n\t"
: [a] "+r" (a)
:
: [a] "r" (a)
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6"
);
return (word32)(size_t)a;
@@ -4362,7 +4362,7 @@ int curve25519_base(byte* r, const byte* n)
/* Store */
"stp x14, x15, [%x[r]]\n\t"
"stp x16, x17, [%x[r], #16]\n\t"
"mov %x[r], xzr\n\t"
"mov x0, xzr\n\t"
"ldp x29, x30, [sp], #0xb0\n\t"
: [r] "+r" (r)
: [n] "r" (n), [x2] "r" (x2)
@@ -6969,7 +6969,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Store */
"stp x14, x15, [%x[r]]\n\t"
"stp x16, x17, [%x[r], #16]\n\t"
"mov %x[r], xzr\n\t"
"mov x0, xzr\n\t"
"ldp x29, x30, [sp], #0xc0\n\t"
: [r] "+r" (r)
: [n] "r" (n), [a] "r" (a)
+6 -6
View File
@@ -8406,11 +8406,11 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
"orr v8.16b, v8.16b, v10.16b\n\t"
"ext v9.16b, v8.16b, v8.16b, #8\n\t"
"orr v8.16b, v8.16b, v9.16b\n\t"
"mov %x[a], v8.d[0]\n\t"
"subs %x[a], %x[a], xzr\n\t"
"csetm %w[a], ne\n\t"
: [a] "+r" (a), [sz] "+r" (sz)
: [b] "r" (b)
"mov x0, v8.d[0]\n\t"
"subs x0, x0, xzr\n\t"
"csetm w0, ne\n\t"
: [sz] "+r" (sz)
: [a] "r" (a), [b] "r" (b)
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11"
);
@@ -9089,7 +9089,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
"b L_mlkem_rej_uniform_loop_lt_4_%=\n\t"
"\n"
"L_mlkem_rej_uniform_done_%=:\n\t"
"mov %x[p], x12\n\t"
"mov x0, x12\n\t"
: [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen)
: [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits),
[indices] "r" (indices)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+33 -33
View File
@@ -39,7 +39,7 @@
.type wc_chacha_setiv, %function
wc_chacha_setiv:
PUSH {r4, r5, r6, lr}
ADD r3, r0, #0x34
ADD r3, r0, #52
LDR r4, [r1]
LDR r5, [r1, #4]
LDR r6, [r1, #8]
@@ -76,7 +76,7 @@ L_chacha_thumb2_constants:
wc_chacha_setkey:
PUSH {r4, r5, r6, r7, lr}
ADR r7, L_chacha_thumb2_constants
SUBS r2, r2, #0x10
SUBS r2, r2, #16
ADD r7, r7, r2
/* Start state with constants */
LDM r7, {r3, r4, r5, r6}
@@ -116,7 +116,7 @@ L_chacha_thumb2_setkey_same_key_bytes:
.type wc_chacha_crypt_bytes, %function
wc_chacha_crypt_bytes:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x34
SUB sp, sp, #52
MOV lr, r0
STRD r0, r1, [sp, #32]
STRD r2, r3, [sp, #40]
@@ -129,7 +129,7 @@ L_chacha_thumb2_crypt_block:
/* Load x[0]..x[12] into registers. */
LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}
/* 10x 2 full rounds to perform. */
MOV lr, #0xa
MOV lr, #10
STR lr, [sp, #48]
L_chacha_thumb2_crypt_loop:
/* 0, 4, 8, 12 */
@@ -248,7 +248,7 @@ L_chacha_thumb2_crypt_loop:
STR lr, [sp, #20]
/* Check if we have done enough rounds. */
LDR lr, [sp, #48]
SUBS lr, lr, #0x1
SUBS lr, lr, #1
STR lr, [sp, #48]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGT L_chacha_thumb2_crypt_loop
@@ -283,7 +283,7 @@ L_chacha_thumb2_crypt_loop:
LDM lr!, {r10, r11}
ADD r8, r8, r10
ADD r9, r9, r11
ADD r10, r10, #0x1
ADD r10, r10, #1
STM r12!, {r8, r9}
STR r10, [lr, #-8]
LDM r12, {r8, r9}
@@ -388,7 +388,7 @@ L_chacha_thumb2_crypt_lt_block:
STR r12, [lr, #64]
ADD lr, lr, #0x44
L_chacha_thumb2_crypt_16byte_loop:
CMP r3, #0x10
CMP r3, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_crypt_word_loop
#else
@@ -404,7 +404,7 @@ L_chacha_thumb2_crypt_16byte_loop:
EOR r9, r9, r5
EOR r10, r10, r6
EOR r11, r11, r7
SUBS r3, r3, #0x10
SUBS r3, r3, #16
STR r8, [r1]
STR r9, [r1, #4]
STR r10, [r1, #8]
@@ -414,15 +414,15 @@ L_chacha_thumb2_crypt_16byte_loop:
#else
BEQ.N L_chacha_thumb2_crypt_done
#endif
ADD r2, r2, #0x10
ADD r1, r1, #0x10
ADD r2, r2, #16
ADD r1, r1, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_16byte_loop
#else
B.N L_chacha_thumb2_crypt_16byte_loop
#endif
L_chacha_thumb2_crypt_word_loop:
CMP r3, #0x4
CMP r3, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_crypt_byte_start
#else
@@ -432,16 +432,16 @@ L_chacha_thumb2_crypt_word_loop:
LDR r4, [lr]
LDR r8, [r2]
EOR r8, r8, r4
SUBS r3, r3, #0x4
SUBS r3, r3, #4
STR r8, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_crypt_done
#else
BEQ.N L_chacha_thumb2_crypt_done
#endif
ADD lr, lr, #0x4
ADD r2, r2, #0x4
ADD r1, r1, #0x4
ADD lr, lr, #4
ADD r2, r2, #4
ADD r1, r1, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_word_loop
#else
@@ -452,7 +452,7 @@ L_chacha_thumb2_crypt_byte_start:
L_chacha_thumb2_crypt_byte_loop:
LDRB r8, [r2]
EOR r8, r8, r4
SUBS r3, r3, #0x1
SUBS r3, r3, #1
STRB r8, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_crypt_done
@@ -460,15 +460,15 @@ L_chacha_thumb2_crypt_byte_loop:
BEQ.N L_chacha_thumb2_crypt_done
#endif
LSR r4, r4, #8
ADD r2, r2, #0x1
ADD r1, r1, #0x1
ADD r2, r2, #1
ADD r1, r1, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_crypt_byte_loop
#else
B.N L_chacha_thumb2_crypt_byte_loop
#endif
L_chacha_thumb2_crypt_done:
ADD sp, sp, #0x34
ADD sp, sp, #52
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 508 */
.size wc_chacha_crypt_bytes,.-wc_chacha_crypt_bytes
@@ -479,7 +479,7 @@ L_chacha_thumb2_crypt_done:
wc_chacha_use_over:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
L_chacha_thumb2_over_16byte_loop:
CMP r3, #0x10
CMP r3, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_over_word_loop
#else
@@ -498,7 +498,7 @@ L_chacha_thumb2_over_16byte_loop:
EOR r5, r5, r9
EOR r6, r6, r10
EOR r7, r7, r11
SUBS r3, r3, #0x10
SUBS r3, r3, #16
STR r4, [r1]
STR r5, [r1, #4]
STR r6, [r1, #8]
@@ -508,16 +508,16 @@ L_chacha_thumb2_over_16byte_loop:
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x10
ADD r2, r2, #0x10
ADD r1, r1, #0x10
ADD r0, r0, #16
ADD r2, r2, #16
ADD r1, r1, #16
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_16byte_loop
#else
B.N L_chacha_thumb2_over_16byte_loop
#endif
L_chacha_thumb2_over_word_loop:
CMP r3, #0x4
CMP r3, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_chacha_thumb2_over_byte_loop
#else
@@ -527,16 +527,16 @@ L_chacha_thumb2_over_word_loop:
LDR r4, [r0]
LDR r8, [r2]
EOR r4, r4, r8
SUBS r3, r3, #0x4
SUBS r3, r3, #4
STR r4, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_over_done
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x4
ADD r2, r2, #0x4
ADD r1, r1, #0x4
ADD r0, r0, #4
ADD r2, r2, #4
ADD r1, r1, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_word_loop
#else
@@ -547,16 +547,16 @@ L_chacha_thumb2_over_byte_loop:
LDRB r4, [r0]
LDRB r8, [r2]
EOR r4, r4, r8
SUBS r3, r3, #0x1
SUBS r3, r3, #1
STRB r4, [r1]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_chacha_thumb2_over_done
#else
BEQ.N L_chacha_thumb2_over_done
#endif
ADD r0, r0, #0x1
ADD r2, r2, #0x1
ADD r1, r1, #0x1
ADD r0, r0, #1
ADD r2, r2, #1
ADD r1, r1, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
B L_chacha_thumb2_over_byte_loop
#else
+33 -33
View File
@@ -65,7 +65,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"ADD r3, %[x], #0x34\n\t"
"ADD r3, %[x], #52\n\t"
"LDR r4, [%[iv]]\n\t"
"LDR r5, [%[iv], #4]\n\t"
"LDR r6, [%[iv], #8]\n\t"
@@ -113,7 +113,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
__asm__ __volatile__ (
"MOV r7, %[L_chacha_thumb2_constants]\n\t"
"SUBS %[keySz], %[keySz], #0x10\n\t"
"SUBS %[keySz], %[keySz], #16\n\t"
"ADD r7, r7, %[keySz]\n\t"
/* Start state with constants */
"LDM r7, {r3, r4, r5, r6}\n\t"
@@ -180,7 +180,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x34\n\t"
"SUB sp, sp, #52\n\t"
"MOV lr, %[ctx]\n\t"
"STRD %[ctx], %[c], [sp, #32]\n\t"
"STRD %[m], %[len], [sp, #40]\n\t"
@@ -198,7 +198,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
/* Load x[0]..x[12] into registers. */
"LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t"
/* 10x 2 full rounds to perform. */
"MOV lr, #0xa\n\t"
"MOV lr, #10\n\t"
"STR lr, [sp, #48]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -322,7 +322,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"STR lr, [sp, #20]\n\t"
/* Check if we have done enough rounds. */
"LDR lr, [sp, #48]\n\t"
"SUBS lr, lr, #0x1\n\t"
"SUBS lr, lr, #1\n\t"
"STR lr, [sp, #48]\n\t"
#if defined(__GNUC__)
"BGT L_chacha_thumb2_crypt_loop_%=\n\t"
@@ -359,7 +359,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"LDM lr!, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t"
"ADD r10, r10, #0x1\n\t"
"ADD r10, r10, #1\n\t"
"STM r12!, {r8, r9}\n\t"
"STR r10, [lr, #-8]\n\t"
"LDM r12, {r8, r9}\n\t"
@@ -480,7 +480,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_16byte_loop_%=:\n\t"
#endif
"CMP %[len], #0x10\n\t"
"CMP %[len], #16\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_crypt_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -498,7 +498,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"EOR r9, r9, r5\n\t"
"EOR r10, r10, r6\n\t"
"EOR r11, r11, r7\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
"STR r8, [%[c]]\n\t"
"STR r9, [%[c], #4]\n\t"
"STR r10, [%[c], #8]\n\t"
@@ -510,8 +510,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"ADD %[m], %[m], #0x10\n\t"
"ADD %[c], %[c], #0x10\n\t"
"ADD %[m], %[m], #16\n\t"
"ADD %[c], %[c], #16\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_16byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -525,7 +525,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_word_loop_%=:\n\t"
#endif
"CMP %[len], #0x4\n\t"
"CMP %[len], #4\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_crypt_byte_start_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -537,7 +537,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"LDR r4, [lr]\n\t"
"LDR r8, [%[m]]\n\t"
"EOR r8, r8, r4\n\t"
"SUBS %[len], %[len], #0x4\n\t"
"SUBS %[len], %[len], #4\n\t"
"STR r8, [%[c]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_crypt_done_%=\n\t"
@@ -546,9 +546,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"ADD lr, lr, #0x4\n\t"
"ADD %[m], %[m], #0x4\n\t"
"ADD %[c], %[c], #0x4\n\t"
"ADD lr, lr, #4\n\t"
"ADD %[m], %[m], #4\n\t"
"ADD %[c], %[c], #4\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -571,7 +571,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#endif
"LDRB r8, [%[m]]\n\t"
"EOR r8, r8, r4\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"SUBS %[len], %[len], #1\n\t"
"STRB r8, [%[c]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_crypt_done_%=\n\t"
@@ -581,8 +581,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"BEQ.N L_chacha_thumb2_crypt_done_%=\n\t"
#endif
"LSR r4, r4, #8\n\t"
"ADD %[m], %[m], #0x1\n\t"
"ADD %[c], %[c], #0x1\n\t"
"ADD %[m], %[m], #1\n\t"
"ADD %[c], %[c], #1\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_crypt_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -596,7 +596,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
#else
"L_chacha_thumb2_crypt_done_%=:\n\t"
#endif
"ADD sp, sp, #0x34\n\t"
"ADD sp, sp, #52\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len)
:
@@ -631,7 +631,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"L_chacha_thumb2_over_16byte_loop_%=:\n\t"
#endif
"CMP %[len], #0x10\n\t"
"CMP %[len], #16\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_over_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -652,7 +652,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
"STR r4, [%[output]]\n\t"
"STR r5, [%[output], #4]\n\t"
"STR r6, [%[output], #8]\n\t"
@@ -664,9 +664,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x10\n\t"
"ADD %[input], %[input], #0x10\n\t"
"ADD %[output], %[output], #0x10\n\t"
"ADD %[over], %[over], #16\n\t"
"ADD %[input], %[input], #16\n\t"
"ADD %[output], %[output], #16\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_16byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -680,7 +680,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"L_chacha_thumb2_over_word_loop_%=:\n\t"
#endif
"CMP %[len], #0x4\n\t"
"CMP %[len], #4\n\t"
#if defined(__GNUC__)
"BLT L_chacha_thumb2_over_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -692,7 +692,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"LDR r4, [%[over]]\n\t"
"LDR r8, [%[input]]\n\t"
"EOR r4, r4, r8\n\t"
"SUBS %[len], %[len], #0x4\n\t"
"SUBS %[len], %[len], #4\n\t"
"STR r4, [%[output]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_over_done_%=\n\t"
@@ -701,9 +701,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x4\n\t"
"ADD %[input], %[input], #0x4\n\t"
"ADD %[output], %[output], #0x4\n\t"
"ADD %[over], %[over], #4\n\t"
"ADD %[input], %[input], #4\n\t"
"ADD %[output], %[output], #4\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_word_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -721,7 +721,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
"LDRB r4, [%[over]]\n\t"
"LDRB r8, [%[input]]\n\t"
"EOR r4, r4, r8\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"SUBS %[len], %[len], #1\n\t"
"STRB r4, [%[output]]\n\t"
#if defined(__GNUC__)
"BEQ L_chacha_thumb2_over_done_%=\n\t"
@@ -730,9 +730,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output,
#else
"BEQ.N L_chacha_thumb2_over_done_%=\n\t"
#endif
"ADD %[over], %[over], #0x1\n\t"
"ADD %[input], %[input], #0x1\n\t"
"ADD %[output], %[output], #0x1\n\t"
"ADD %[over], %[over], #1\n\t"
"ADD %[input], %[input], #1\n\t"
"ADD %[output], %[output], #1\n\t"
#if defined(__GNUC__)
"B L_chacha_thumb2_over_byte_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+54 -54
View File
@@ -69,13 +69,13 @@ L_mlkem_thumb2_ntt_zetas:
.type mlkem_thumb2_ntt, %function
mlkem_thumb2_ntt:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x8
SUB sp, sp, #8
ADR r1, L_mlkem_thumb2_ntt_zetas
#ifndef WOLFSSL_ARM_ARCH_7M
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r2, #0x10
MOV r2, #16
L_mlkem_thumb2_ntt_loop_123:
STR r2, [sp]
LDRH lr, [r1, #2]
@@ -507,19 +507,19 @@ L_mlkem_thumb2_ntt_loop_123:
STR r8, [r0, #384]
STR r9, [r0, #448]
LDR r2, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_123
#else
BNE.N L_mlkem_thumb2_ntt_loop_123
#endif
SUB r0, r0, #0x40
MOV r3, #0x0
MOV r3, #0
L_mlkem_thumb2_ntt_loop_4_j:
STR r3, [sp, #4]
ADD lr, r1, r3, LSR #4
MOV r2, #0x4
MOV r2, #4
LDR lr, [lr, #16]
L_mlkem_thumb2_ntt_loop_4_i:
STR r2, [sp]
@@ -676,8 +676,8 @@ L_mlkem_thumb2_ntt_loop_4_i:
STR r8, [r0, #96]
STR r9, [r0, #112]
LDRD r2, r3, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_4_i
#else
@@ -692,7 +692,7 @@ L_mlkem_thumb2_ntt_loop_4_i:
BNE.N L_mlkem_thumb2_ntt_loop_4_j
#endif
SUB r0, r0, #0x200
MOV r3, #0x0
MOV r3, #0
L_mlkem_thumb2_ntt_loop_567:
ADD lr, r1, r3, LSR #3
STR r3, [sp, #4]
@@ -1310,15 +1310,15 @@ L_mlkem_thumb2_ntt_loop_567:
STR r8, [r0, #24]
STR r9, [r0, #28]
LDR r3, [sp, #4]
ADD r3, r3, #0x10
ADD r3, r3, #16
RSBS r10, r3, #0x100
ADD r0, r0, #0x20
ADD r0, r0, #32
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_ntt_loop_567
#else
BNE.N L_mlkem_thumb2_ntt_loop_567
#endif
ADD sp, sp, #0x8
ADD sp, sp, #8
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 1270 */
.size mlkem_thumb2_ntt,.-mlkem_thumb2_ntt
@@ -1358,13 +1358,13 @@ L_mlkem_invntt_zetas_inv:
.type mlkem_thumb2_invntt, %function
mlkem_thumb2_invntt:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x8
SUB sp, sp, #8
ADR r1, L_mlkem_invntt_zetas_inv
#ifndef WOLFSSL_ARM_ARCH_7M
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r3, #0x0
MOV r3, #0
L_mlkem_invntt_loop_765:
ADD lr, r1, r3, LSR #1
STR r3, [sp, #4]
@@ -1953,20 +1953,20 @@ L_mlkem_invntt_loop_765:
STR r8, [r0, #24]
STR r9, [r0, #28]
LDR r3, [sp, #4]
ADD r3, r3, #0x10
ADD r3, r3, #16
RSBS r10, r3, #0x100
ADD r0, r0, #0x20
ADD r0, r0, #32
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_765
#else
BNE.N L_mlkem_invntt_loop_765
#endif
SUB r0, r0, #0x200
MOV r3, #0x0
MOV r3, #0
L_mlkem_invntt_loop_4_j:
STR r3, [sp, #4]
ADD lr, r1, r3, LSR #4
MOV r2, #0x4
MOV r2, #4
LDR lr, [lr, #224]
L_mlkem_invntt_loop_4_i:
STR r2, [sp]
@@ -2143,8 +2143,8 @@ L_mlkem_invntt_loop_4_i:
STR r8, [r0, #96]
STR r9, [r0, #112]
LDRD r2, r3, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_4_i
#else
@@ -2159,7 +2159,7 @@ L_mlkem_invntt_loop_4_i:
BNE.N L_mlkem_invntt_loop_4_j
#endif
SUB r0, r0, #0x200
MOV r2, #0x10
MOV r2, #16
L_mlkem_invntt_loop_321:
STR r2, [sp]
LDRH lr, [r1, #2]
@@ -2967,14 +2967,14 @@ L_mlkem_invntt_loop_321:
STR r8, [r0, #384]
STR r9, [r0, #448]
LDR r2, [sp]
SUBS r2, r2, #0x1
ADD r0, r0, #0x4
SUBS r2, r2, #1
ADD r0, r0, #4
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_invntt_loop_321
#else
BNE.N L_mlkem_invntt_loop_321
#endif
ADD sp, sp, #0x8
ADD sp, sp, #8
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 1629 */
.size mlkem_thumb2_invntt,.-mlkem_thumb2_invntt
@@ -3020,12 +3020,12 @@ mlkem_thumb2_basemul_mont:
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r8, #0x0
MOV r8, #0
L_mlkem_basemul_mont_loop:
LDM r1!, {r4, r5}
LDM r2!, {r6, r7}
LDR lr, [r3, r8]
ADD r8, r8, #0x2
ADD r8, r8, #2
PUSH {r8}
CMP r8, #0x80
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3035,7 +3035,7 @@ L_mlkem_basemul_mont_loop:
SMULTB r11, r12, r10
SMLABB r8, r12, r9, r8
SMLABB r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SMULBT r8, lr, r8
SMULBT r10, r11, r10
SMLABB r8, r4, r6, r8
@@ -3071,7 +3071,7 @@ L_mlkem_basemul_mont_loop:
SBFX r11, r11, #0, #16
MLA r8, r12, r9, r8
MLA r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SBFX r9, lr, #0, #16
SBFX r11, r11, #0, #16
ASR r8, r8, #16
@@ -3143,12 +3143,12 @@ mlkem_thumb2_basemul_mont_add:
MOV r12, #0xd01
MOVT r12, #0xcff
#endif /* !WOLFSSL_ARM_ARCH_7M */
MOV r8, #0x0
MOV r8, #0
L_mlkem_thumb2_basemul_mont_add_loop:
LDM r1!, {r4, r5}
LDM r2!, {r6, r7}
LDR lr, [r3, r8]
ADD r8, r8, #0x2
ADD r8, r8, #2
PUSH {r8}
CMP r8, #0x80
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3158,7 +3158,7 @@ L_mlkem_thumb2_basemul_mont_add_loop:
SMULTB r11, r12, r10
SMLABB r8, r12, r9, r8
SMLABB r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SMULBT r8, lr, r8
SMULBT r10, r11, r10
SMLABB r8, r4, r6, r8
@@ -3197,7 +3197,7 @@ L_mlkem_thumb2_basemul_mont_add_loop:
SBFX r11, r11, #0, #16
MLA r8, r12, r9, r8
MLA r10, r12, r11, r10
RSB r11, lr, #0x0
RSB r11, lr, #0
SBFX r9, lr, #0, #16
SBFX r11, r11, #0, #16
ASR r8, r8, #16
@@ -3346,7 +3346,7 @@ L_mlkem_thumb2_csubq_loop:
BFI r5, r10, #0, #16
#endif /* !WOLFSSL_ARM_ARCH_7M */
STM r0!, {r2, r3, r4, r5}
SUBS r1, r1, #0x8
SUBS r1, r1, #8
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_csubq_loop
#else
@@ -3362,9 +3362,9 @@ L_mlkem_thumb2_csubq_loop:
mlkem_thumb2_rej_uniform:
PUSH {r4, r5, r6, r7, r8, r9, r10, lr}
MOV r8, #0xd01
MOV r9, #0x0
MOV r9, #0
L_mlkem_thumb2_rej_uniform_loop_no_fail:
CMP r1, #0x8
CMP r1, #8
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BLT L_mlkem_thumb2_rej_uniform_done_no_fail
#else
@@ -3421,7 +3421,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail:
LSR r10, r10, #31
SUB r1, r1, r10
ADD r9, r9, r10, LSL #1
SUBS r3, r3, #0xc
SUBS r3, r3, #12
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_mlkem_thumb2_rej_uniform_loop_no_fail
#else
@@ -3433,7 +3433,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail:
B.N L_mlkem_thumb2_rej_uniform_done
#endif
L_mlkem_thumb2_rej_uniform_done_no_fail:
CMP r1, #0x0
CMP r1, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3449,8 +3449,8 @@ L_mlkem_thumb2_rej_uniform_loop:
BGE.N L_mlkem_thumb2_rej_uniform_fail_0
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3465,8 +3465,8 @@ L_mlkem_thumb2_rej_uniform_fail_0:
BGE.N L_mlkem_thumb2_rej_uniform_fail_1
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3482,8 +3482,8 @@ L_mlkem_thumb2_rej_uniform_fail_1:
BGE.N L_mlkem_thumb2_rej_uniform_fail_2
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3498,8 +3498,8 @@ L_mlkem_thumb2_rej_uniform_fail_2:
BGE.N L_mlkem_thumb2_rej_uniform_fail_3
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3514,8 +3514,8 @@ L_mlkem_thumb2_rej_uniform_fail_3:
BGE.N L_mlkem_thumb2_rej_uniform_fail_4
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3531,8 +3531,8 @@ L_mlkem_thumb2_rej_uniform_fail_4:
BGE.N L_mlkem_thumb2_rej_uniform_fail_5
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3547,8 +3547,8 @@ L_mlkem_thumb2_rej_uniform_fail_5:
BGE.N L_mlkem_thumb2_rej_uniform_fail_6
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
@@ -3563,15 +3563,15 @@ L_mlkem_thumb2_rej_uniform_fail_6:
BGE.N L_mlkem_thumb2_rej_uniform_fail_7
#endif
STRH r7, [r0, r9]
SUBS r1, r1, #0x1
ADD r9, r9, #0x2
SUBS r1, r1, #1
ADD r9, r9, #2
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_mlkem_thumb2_rej_uniform_done
#else
BEQ.N L_mlkem_thumb2_rej_uniform_done
#endif
L_mlkem_thumb2_rej_uniform_fail_7:
SUBS r3, r3, #0xc
SUBS r3, r3, #12
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGT L_mlkem_thumb2_rej_uniform_loop
#else
+54 -54
View File
@@ -85,13 +85,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"SUB sp, sp, #8\n\t"
"MOV r1, %[L_mlkem_thumb2_ntt_zetas]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r2, #0x10\n\t"
"MOV r2, #16\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_123:\n\t"
@@ -528,8 +528,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -538,7 +538,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"BNE.N L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#endif
"SUB %[r], %[r], #0x40\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_4_j:\n\t"
@@ -547,7 +547,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"MOV r2, #4\n\t"
"LDR lr, [lr, #16]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -709,8 +709,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -729,7 +729,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"BNE.N L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_567:\n\t"
@@ -1352,9 +1352,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"ADD r3, r3, #16\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
"ADD %[r], %[r], #32\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1362,7 +1362,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#else
"BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
"ADD sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c)
@@ -1412,13 +1412,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"SUB sp, sp, #8\n\t"
"MOV r1, %[L_mlkem_invntt_zetas_inv]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_765:\n\t"
@@ -2012,9 +2012,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"ADD r3, r3, #16\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
"ADD %[r], %[r], #32\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_765_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2023,7 +2023,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"BNE.N L_mlkem_invntt_loop_765_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"MOV r3, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_4_j:\n\t"
@@ -2032,7 +2032,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"MOV r2, #4\n\t"
"LDR lr, [lr, #224]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2214,8 +2214,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2234,7 +2234,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"BNE.N L_mlkem_invntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r2, #0x10\n\t"
"MOV r2, #16\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_321:\n\t"
@@ -3047,8 +3047,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
"SUBS r2, r2, #1\n\t"
"ADD %[r], %[r], #4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_321_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3056,7 +3056,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#else
"BNE.N L_mlkem_invntt_loop_321_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
"ADD sp, sp, #8\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r),
[L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c)
@@ -3116,7 +3116,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r8, #0x0\n\t"
"MOV r8, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_basemul_mont_loop:\n\t"
@@ -3126,7 +3126,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"ADD r8, r8, #2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3136,7 +3136,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
@@ -3172,7 +3172,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
@@ -3271,7 +3271,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"MOV r8, #0x0\n\t"
"MOV r8, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_basemul_mont_add_loop:\n\t"
@@ -3281,7 +3281,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"ADD r8, r8, #2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
@@ -3291,7 +3291,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
@@ -3330,7 +3330,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"RSB r11, lr, #0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
@@ -3507,7 +3507,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p)
"BFI r5, r10, #0, #16\n\t"
#endif /* !WOLFSSL_ARM_ARCH_7M */
"STM %[p]!, {r2, r3, r4, r5}\n\t"
"SUBS r1, r1, #0x8\n\t"
"SUBS r1, r1, #8\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_csubq_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3551,14 +3551,14 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
__asm__ __volatile__ (
"MOV r8, #0xd01\n\t"
"MOV r9, #0x0\n\t"
"MOV r9, #0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x8\n\t"
"CMP %[len], #8\n\t"
#if defined(__GNUC__)
"BLT L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3617,7 +3617,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"SUBS %[rLen], %[rLen], #0xc\n\t"
"SUBS %[rLen], %[rLen], #12\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3638,7 +3638,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
#else
"L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x0\n\t"
"CMP %[len], #0\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3663,8 +3663,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3688,8 +3688,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3714,8 +3714,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3739,8 +3739,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3764,8 +3764,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3790,8 +3790,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3815,8 +3815,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3840,8 +3840,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
"BGE.N L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
"SUBS %[len], %[len], #1\n\t"
"ADD r9, r9, #2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -3855,7 +3855,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
#else
"L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t"
#endif
"SUBS %[rLen], %[rLen], #0xc\n\t"
"SUBS %[rLen], %[rLen], #12\n\t"
#if defined(__GNUC__)
"BGT L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
+29 -29
View File
@@ -39,17 +39,17 @@
.type poly1305_blocks_thumb2_16, %function
poly1305_blocks_thumb2_16:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0x1c
CMP r2, #0x0
SUB sp, sp, #28
CMP r2, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_poly1305_thumb2_16_done
#else
BEQ.N L_poly1305_thumb2_16_done
#endif
ADD lr, sp, #0xc
ADD lr, sp, #12
STM lr, {r0, r1, r2, r3}
/* Get h pointer */
ADD lr, r0, #0x10
ADD lr, r0, #16
LDM lr, {r4, r5, r6, r7, r8}
L_poly1305_thumb2_16_loop:
/* Add m to h */
@@ -63,7 +63,7 @@ L_poly1305_thumb2_16_loop:
ADCS r5, r5, r3
ADCS r6, r6, r9
ADCS r7, r7, r10
ADD r1, r1, #0x10
ADD r1, r1, #16
ADC r8, r8, r11
#ifdef WOLFSSL_ARM_ARCH_7M
STM lr, {r4, r5, r6, r7, r8}
@@ -205,7 +205,7 @@ L_poly1305_thumb2_16_loop:
LDR r5, [lr, #16]
/* r[3] * h[3] */
UMAAL r10, r11, r3, r4
MOV r12, #0x0
MOV r12, #0
/* r[0] * h[4] */
UMAAL r8, r12, r0, r5
/* r[1] * h[4] */
@@ -221,8 +221,8 @@ L_poly1305_thumb2_16_loop:
/* Load length */
LDR r2, [sp, #20]
/* Reduce mod 2^130 - 5 */
BIC r3, r8, #0x3
AND r8, r8, #0x3
BIC r3, r8, #3
AND r8, r8, #3
ADDS r4, r4, r3
LSR r3, r3, #2
ADCS r5, r5, r9
@@ -241,7 +241,7 @@ L_poly1305_thumb2_16_loop:
ADCS r7, r7, r11
ADC r8, r8, r12
/* Sub 16 from length. */
SUBS r2, r2, #0x10
SUBS r2, r2, #16
/* Store length. */
STR r2, [sp, #20]
/* Loop again if more message to do. */
@@ -252,7 +252,7 @@ L_poly1305_thumb2_16_loop:
#endif
STM lr, {r4, r5, r6, r7, r8}
L_poly1305_thumb2_16_done:
ADD sp, sp, #0x1c
ADD sp, sp, #28
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 250 */
.size poly1305_blocks_thumb2_16,.-poly1305_blocks_thumb2_16
@@ -285,7 +285,7 @@ poly1305_set_key:
LDR r3, [r1, #20]
LDR r4, [r1, #24]
LDR r5, [r1, #28]
ADD r10, r0, #0x24
ADD r10, r0, #36
STM r10, {r2, r3, r4, r5}
/* Load, mask and store r. */
LDR r2, [r1]
@@ -296,14 +296,14 @@ poly1305_set_key:
AND r3, r3, r7
AND r4, r4, r8
AND r5, r5, r9
ADD r10, r0, #0x0
ADD r10, r0, #0
STM r10, {r2, r3, r4, r5}
/* h (accumulator) = 0 */
EOR r6, r6, r6
EOR r7, r7, r7
EOR r8, r8, r8
EOR r9, r9, r9
ADD r10, r0, #0x10
ADD r10, r0, #16
EOR r5, r5, r5
STM r10, {r5, r6, r7, r8, r9}
/* Zero leftover */
@@ -317,25 +317,25 @@ poly1305_set_key:
.type poly1305_final, %function
poly1305_final:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ADD r11, r0, #0x10
ADD r11, r0, #16
LDM r11, {r2, r3, r4, r5, r6}
/* Add 5 and check for h larger than p. */
ADDS r7, r2, #0x5
ADCS r7, r3, #0x0
ADCS r7, r4, #0x0
ADCS r7, r5, #0x0
ADC r7, r6, #0x0
SUB r7, r7, #0x4
ADDS r7, r2, #5
ADCS r7, r3, #0
ADCS r7, r4, #0
ADCS r7, r5, #0
ADC r7, r6, #0
SUB r7, r7, #4
LSR r7, r7, #31
SUB r7, r7, #0x1
AND r7, r7, #0x5
SUB r7, r7, #1
AND r7, r7, #5
/* Add 0/5 to h. */
ADDS r2, r2, r7
ADCS r3, r3, #0x0
ADCS r4, r4, #0x0
ADC r5, r5, #0x0
ADCS r3, r3, #0
ADCS r4, r4, #0
ADC r5, r5, #0
/* Add padding */
ADD r11, r0, #0x24
ADD r11, r0, #36
LDM r11, {r7, r8, r9, r10}
ADDS r2, r2, r7
ADCS r3, r3, r8
@@ -352,13 +352,13 @@ poly1305_final:
EOR r4, r4, r4
EOR r5, r5, r5
EOR r6, r6, r6
ADD r11, r0, #0x10
ADD r11, r0, #16
STM r11, {r2, r3, r4, r5, r6}
/* Zero out r. */
ADD r11, r0, #0x0
ADD r11, r0, #0
STM r11, {r2, r3, r4, r5}
/* Zero out padding. */
ADD r11, r0, #0x24
ADD r11, r0, #36
STM r11, {r2, r3, r4, r5}
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
/* Cycle Count = 82 */
+29 -29
View File
@@ -66,8 +66,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"SUB sp, sp, #0x1c\n\t"
"CMP %[len], #0x0\n\t"
"SUB sp, sp, #28\n\t"
"CMP %[len], #0\n\t"
#if defined(__GNUC__)
"BEQ L_poly1305_thumb2_16_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -75,10 +75,10 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#else
"BEQ.N L_poly1305_thumb2_16_done_%=\n\t"
#endif
"ADD lr, sp, #0xc\n\t"
"ADD lr, sp, #12\n\t"
"STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t"
/* Get h pointer */
"ADD lr, %[ctx], #0x10\n\t"
"ADD lr, %[ctx], #16\n\t"
"LDM lr, {r4, r5, r6, r7, r8}\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -97,7 +97,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"ADCS r5, r5, %[notLast]\n\t"
"ADCS r6, r6, r9\n\t"
"ADCS r7, r7, r10\n\t"
"ADD %[m], %[m], #0x10\n\t"
"ADD %[m], %[m], #16\n\t"
"ADC r8, r8, r11\n\t"
#ifdef WOLFSSL_ARM_ARCH_7M
"STM lr, {r4, r5, r6, r7, r8}\n\t"
@@ -239,7 +239,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"LDR r5, [lr, #16]\n\t"
/* r[3] * h[3] */
"UMAAL r10, r11, %[notLast], r4\n\t"
"MOV r12, #0x0\n\t"
"MOV r12, #0\n\t"
/* r[0] * h[4] */
"UMAAL r8, r12, %[ctx], r5\n\t"
/* r[1] * h[4] */
@@ -255,8 +255,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
/* Load length */
"LDR %[len], [sp, #20]\n\t"
/* Reduce mod 2^130 - 5 */
"BIC %[notLast], r8, #0x3\n\t"
"AND r8, r8, #0x3\n\t"
"BIC %[notLast], r8, #3\n\t"
"AND r8, r8, #3\n\t"
"ADDS r4, r4, %[notLast]\n\t"
"LSR %[notLast], %[notLast], #2\n\t"
"ADCS r5, r5, r9\n\t"
@@ -275,7 +275,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
"ADCS r7, r7, r11\n\t"
"ADC r8, r8, r12\n\t"
/* Sub 16 from length. */
"SUBS %[len], %[len], #0x10\n\t"
"SUBS %[len], %[len], #16\n\t"
/* Store length. */
"STR %[len], [sp, #20]\n\t"
/* Loop again if more message to do. */
@@ -293,7 +293,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
#else
"L_poly1305_thumb2_16_done_%=:\n\t"
#endif
"ADD sp, sp, #0x1c\n\t"
"ADD sp, sp, #28\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len),
[notLast] "+r" (notLast)
@@ -337,7 +337,7 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key)
"LDR r3, [%[key], #20]\n\t"
"LDR r4, [%[key], #24]\n\t"
"LDR r5, [%[key], #28]\n\t"
"ADD r10, %[ctx], #0x24\n\t"
"ADD r10, %[ctx], #36\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
/* Load, mask and store r. */
"LDR r2, [%[key]]\n\t"
@@ -348,14 +348,14 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key)
"AND r3, r3, r7\n\t"
"AND r4, r4, r8\n\t"
"AND r5, r5, r9\n\t"
"ADD r10, %[ctx], #0x0\n\t"
"ADD r10, %[ctx], #0\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
/* h (accumulator) = 0 */
"EOR r6, r6, r6\n\t"
"EOR r7, r7, r7\n\t"
"EOR r8, r8, r8\n\t"
"EOR r9, r9, r9\n\t"
"ADD r10, %[ctx], #0x10\n\t"
"ADD r10, %[ctx], #16\n\t"
"EOR r5, r5, r5\n\t"
"STM r10, {r5, r6, r7, r8, r9}\n\t"
/* Zero leftover */
@@ -385,25 +385,25 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
__asm__ __volatile__ (
"ADD r11, %[ctx], #0x10\n\t"
"ADD r11, %[ctx], #16\n\t"
"LDM r11, {r2, r3, r4, r5, r6}\n\t"
/* Add 5 and check for h larger than p. */
"ADDS r7, r2, #0x5\n\t"
"ADCS r7, r3, #0x0\n\t"
"ADCS r7, r4, #0x0\n\t"
"ADCS r7, r5, #0x0\n\t"
"ADC r7, r6, #0x0\n\t"
"SUB r7, r7, #0x4\n\t"
"ADDS r7, r2, #5\n\t"
"ADCS r7, r3, #0\n\t"
"ADCS r7, r4, #0\n\t"
"ADCS r7, r5, #0\n\t"
"ADC r7, r6, #0\n\t"
"SUB r7, r7, #4\n\t"
"LSR r7, r7, #31\n\t"
"SUB r7, r7, #0x1\n\t"
"AND r7, r7, #0x5\n\t"
"SUB r7, r7, #1\n\t"
"AND r7, r7, #5\n\t"
/* Add 0/5 to h. */
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, #0x0\n\t"
"ADCS r4, r4, #0x0\n\t"
"ADC r5, r5, #0x0\n\t"
"ADCS r3, r3, #0\n\t"
"ADCS r4, r4, #0\n\t"
"ADC r5, r5, #0\n\t"
/* Add padding */
"ADD r11, %[ctx], #0x24\n\t"
"ADD r11, %[ctx], #36\n\t"
"LDM r11, {r7, r8, r9, r10}\n\t"
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, r8\n\t"
@@ -420,13 +420,13 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac)
"EOR r4, r4, r4\n\t"
"EOR r5, r5, r5\n\t"
"EOR r6, r6, r6\n\t"
"ADD r11, %[ctx], #0x10\n\t"
"ADD r11, %[ctx], #16\n\t"
"STM r11, {r2, r3, r4, r5, r6}\n\t"
/* Zero out r. */
"ADD r11, %[ctx], #0x0\n\t"
"ADD r11, %[ctx], #0\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
/* Zero out padding. */
"ADD r11, %[ctx], #0x24\n\t"
"ADD r11, %[ctx], #36\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [ctx] "+r" (ctx), [mac] "+r" (mac)
+21 -21
View File
@@ -128,7 +128,7 @@ L_SHA256_transform_len_begin:
LDR r4, [r0, #8]
EOR r11, r11, r4
#ifndef WOLFSSL_ARMASM_SHA256_SMALL
MOV r3, #0x3
MOV r3, #3
/* Start of 16 rounds */
L_SHA256_transform_len_start_fast:
/* Round 0 */
@@ -884,7 +884,7 @@ L_SHA256_transform_len_start_fast:
ADD r9, r9, r4
STR r9, [sp, #60]
ADD r12, r12, #0x40
SUBS r3, r3, #0x1
SUBS r3, r3, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start_fast
#else
@@ -1403,10 +1403,10 @@ L_SHA256_transform_len_start_fast:
STR r8, [r0, #16]
STR r9, [r0]
#else
MOV r3, #0x4
MOV r3, #4
/* Start of 16 rounds */
L_SHA256_transform_len_start_small:
SUB r3, r3, #0x1
SUB r3, r3, #1
/* Round 0 */
LDR r5, [r0, #16]
LDR r6, [r0, #20]
@@ -1439,7 +1439,7 @@ L_SHA256_transform_len_start_small:
ADD r9, r9, r11
STR r8, [r0, #12]
STR r9, [r0, #28]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_0
#else
@@ -1493,7 +1493,7 @@ L_SHA256_transform_len_blk_end_0:
ADD r9, r9, r10
STR r8, [r0, #8]
STR r9, [r0, #24]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_1
#else
@@ -1547,7 +1547,7 @@ L_SHA256_transform_len_blk_end_1:
ADD r9, r9, r11
STR r8, [r0, #4]
STR r9, [r0, #20]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_2
#else
@@ -1601,7 +1601,7 @@ L_SHA256_transform_len_blk_end_2:
ADD r9, r9, r10
STR r8, [r0]
STR r9, [r0, #16]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_3
#else
@@ -1655,7 +1655,7 @@ L_SHA256_transform_len_blk_end_3:
ADD r9, r9, r11
STR r8, [r0, #28]
STR r9, [r0, #12]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_4
#else
@@ -1709,7 +1709,7 @@ L_SHA256_transform_len_blk_end_4:
ADD r9, r9, r10
STR r8, [r0, #24]
STR r9, [r0, #8]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_5
#else
@@ -1763,7 +1763,7 @@ L_SHA256_transform_len_blk_end_5:
ADD r9, r9, r11
STR r8, [r0, #20]
STR r9, [r0, #4]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_6
#else
@@ -1817,7 +1817,7 @@ L_SHA256_transform_len_blk_end_6:
ADD r9, r9, r10
STR r8, [r0, #16]
STR r9, [r0]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_7
#else
@@ -1871,7 +1871,7 @@ L_SHA256_transform_len_blk_end_7:
ADD r9, r9, r11
STR r8, [r0, #12]
STR r9, [r0, #28]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_8
#else
@@ -1925,7 +1925,7 @@ L_SHA256_transform_len_blk_end_8:
ADD r9, r9, r10
STR r8, [r0, #8]
STR r9, [r0, #24]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_9
#else
@@ -1979,7 +1979,7 @@ L_SHA256_transform_len_blk_end_9:
ADD r9, r9, r11
STR r8, [r0, #4]
STR r9, [r0, #20]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_10
#else
@@ -2033,7 +2033,7 @@ L_SHA256_transform_len_blk_end_10:
ADD r9, r9, r10
STR r8, [r0]
STR r9, [r0, #16]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_11
#else
@@ -2087,7 +2087,7 @@ L_SHA256_transform_len_blk_end_11:
ADD r9, r9, r11
STR r8, [r0, #28]
STR r9, [r0, #12]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_12
#else
@@ -2141,7 +2141,7 @@ L_SHA256_transform_len_blk_end_12:
ADD r9, r9, r10
STR r8, [r0, #24]
STR r9, [r0, #8]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_13
#else
@@ -2195,7 +2195,7 @@ L_SHA256_transform_len_blk_end_13:
ADD r9, r9, r11
STR r8, [r0, #20]
STR r9, [r0, #4]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_14
#else
@@ -2249,7 +2249,7 @@ L_SHA256_transform_len_blk_end_14:
ADD r9, r9, r10
STR r8, [r0, #16]
STR r9, [r0]
CMP r3, #0x0
CMP r3, #0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_SHA256_transform_len_blk_end_15
#else
@@ -2271,7 +2271,7 @@ L_SHA256_transform_len_blk_end_14:
ADD r9, r9, r4
STR r9, [sp, #60]
L_SHA256_transform_len_blk_end_15:
CMP r3, #0x0
CMP r3, #0
ADD r12, r12, #0x40
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start_small
+21 -21
View File
@@ -155,7 +155,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"LDR r4, [%[sha256], #8]\n\t"
"EOR r11, r11, r4\n\t"
#ifndef WOLFSSL_ARMASM_SHA256_SMALL
"MOV r3, #0x3\n\t"
"MOV r3, #3\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -916,7 +916,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r4\n\t"
"STR r9, [sp, #60]\n\t"
"ADD r12, r12, #0x40\n\t"
"SUBS r3, r3, #0x1\n\t"
"SUBS r3, r3, #1\n\t"
#if defined(__GNUC__)
"BNE L_SHA256_transform_len_start_fast_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1437,7 +1437,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
#else
"MOV r3, #0x4\n\t"
"MOV r3, #4\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1445,7 +1445,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
#else
"L_SHA256_transform_len_start_small_%=:\n\t"
#endif
"SUB r3, r3, #0x1\n\t"
"SUB r3, r3, #1\n\t"
/* Round 0 */
"LDR r5, [%[sha256], #16]\n\t"
"LDR r6, [%[sha256], #20]\n\t"
@@ -1478,7 +1478,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #12]\n\t"
"STR r9, [%[sha256], #28]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_0_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1539,7 +1539,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #8]\n\t"
"STR r9, [%[sha256], #24]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_1_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1600,7 +1600,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #4]\n\t"
"STR r9, [%[sha256], #20]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_2_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1661,7 +1661,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256]]\n\t"
"STR r9, [%[sha256], #16]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_3_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1722,7 +1722,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #28]\n\t"
"STR r9, [%[sha256], #12]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_4_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1783,7 +1783,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #24]\n\t"
"STR r9, [%[sha256], #8]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_5_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1844,7 +1844,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #20]\n\t"
"STR r9, [%[sha256], #4]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_6_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1905,7 +1905,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_7_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -1966,7 +1966,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #12]\n\t"
"STR r9, [%[sha256], #28]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_8_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2027,7 +2027,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #8]\n\t"
"STR r9, [%[sha256], #24]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_9_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2088,7 +2088,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #4]\n\t"
"STR r9, [%[sha256], #20]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_10_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2149,7 +2149,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256]]\n\t"
"STR r9, [%[sha256], #16]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_11_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2210,7 +2210,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #28]\n\t"
"STR r9, [%[sha256], #12]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_12_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2271,7 +2271,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #24]\n\t"
"STR r9, [%[sha256], #8]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_13_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2332,7 +2332,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r11\n\t"
"STR r8, [%[sha256], #20]\n\t"
"STR r9, [%[sha256], #4]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_14_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2393,7 +2393,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
"ADD r9, r9, r10\n\t"
"STR r8, [%[sha256], #16]\n\t"
"STR r9, [%[sha256]]\n\t"
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
#if defined(__GNUC__)
"BEQ L_SHA256_transform_len_blk_end_15_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2422,7 +2422,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256,
#else
"L_SHA256_transform_len_blk_end_15_%=:\n\t"
#endif
"CMP r3, #0x0\n\t"
"CMP r3, #0\n\t"
"ADD r12, r12, #0x40\n\t"
#if defined(__GNUC__)
"BNE L_SHA256_transform_len_start_small_%=\n\t"
+4 -4
View File
@@ -67,7 +67,7 @@ BlockSha3:
PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr}
SUB sp, sp, #0xcc
ADR r1, L_sha3_thumb2_rt
MOV r2, #0xc
MOV r2, #12
L_sha3_thumb2_begin:
STR r2, [sp, #200]
/* Round even */
@@ -336,7 +336,7 @@ L_sha3_thumb2_begin:
STR lr, [sp, #36]
/* Get constant */
LDRD r10, r11, [r1]
ADD r1, r1, #0x8
ADD r1, r1, #8
BIC r12, r6, r4
BIC lr, r7, r5
EOR r12, r12, r2
@@ -864,7 +864,7 @@ L_sha3_thumb2_begin:
STR lr, [r0, #36]
/* Get constant */
LDRD r10, r11, [r1]
ADD r1, r1, #0x8
ADD r1, r1, #8
BIC r12, r6, r4
BIC lr, r7, r5
EOR r12, r12, r2
@@ -1127,7 +1127,7 @@ L_sha3_thumb2_begin:
STR r12, [r0, #160]
STR lr, [r0, #164]
LDR r2, [sp, #200]
SUBS r2, r2, #0x1
SUBS r2, r2, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_sha3_thumb2_begin
#else
+4 -4
View File
@@ -82,7 +82,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
__asm__ __volatile__ (
"SUB sp, sp, #0xcc\n\t"
"MOV r1, %[L_sha3_thumb2_rt]\n\t"
"MOV r2, #0xc\n\t"
"MOV r2, #12\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_sha3_thumb2_begin:\n\t"
@@ -356,7 +356,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR lr, [sp, #36]\n\t"
/* Get constant */
"LDRD r10, r11, [r1]\n\t"
"ADD r1, r1, #0x8\n\t"
"ADD r1, r1, #8\n\t"
"BIC r12, r6, r4\n\t"
"BIC lr, r7, r5\n\t"
"EOR r12, r12, r2\n\t"
@@ -884,7 +884,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR lr, [%[state], #36]\n\t"
/* Get constant */
"LDRD r10, r11, [r1]\n\t"
"ADD r1, r1, #0x8\n\t"
"ADD r1, r1, #8\n\t"
"BIC r12, r6, r4\n\t"
"BIC lr, r7, r5\n\t"
"EOR r12, r12, r2\n\t"
@@ -1147,7 +1147,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state)
"STR r12, [%[state], #160]\n\t"
"STR lr, [%[state], #164]\n\t"
"LDR r2, [sp, #200]\n\t"
"SUBS r2, r2, #0x1\n\t"
"SUBS r2, r2, #1\n\t"
#if defined(__GNUC__)
"BNE L_sha3_thumb2_begin_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
+2 -2
View File
@@ -217,7 +217,7 @@ L_SHA512_transform_len_begin:
LDRD r4, r5, [r0, #16]
EOR r10, r10, r4
EOR r11, r11, r5
MOV r12, #0x4
MOV r12, #4
/* Start of 16 rounds */
L_SHA512_transform_len_start:
/* Round 0 */
@@ -2205,7 +2205,7 @@ L_SHA512_transform_len_start:
ADC r5, r5, r7
STRD r4, r5, [sp, #120]
ADD r3, r3, #0x80
SUBS r12, r12, #0x1
SUBS r12, r12, #1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_start
#else
+2 -2
View File
@@ -244,7 +244,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512,
"LDRD r4, r5, [%[sha512], #16]\n\t"
"EOR r10, r10, r4\n\t"
"EOR r11, r11, r5\n\t"
"MOV r12, #0x4\n\t"
"MOV r12, #4\n\t"
/* Start of 16 rounds */
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
@@ -2237,7 +2237,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512,
"ADC r5, r5, r7\n\t"
"STRD r4, r5, [sp, #120]\n\t"
"ADD r3, r3, #0x80\n\t"
"SUBS r12, r12, #0x1\n\t"
"SUBS r12, r12, #1\n\t"
#if defined(__GNUC__)
"BNE L_SHA512_transform_len_start_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)