diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index 156116b3a..a266995e9 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -43,17 +43,55 @@ fe_init: .type fe_frombytes, %function fe_frombytes: push {r4, r5, r6, r7, lr} - ldrd r2, r3, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1] + ldr r3, [r1, #4] +#else + ldrd r2, r3, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #8] ldr lr, [r1, #12] - ldrd r4, r5, [r1, #16] - ldrd r6, r7, [r1, #24] +#else + ldrd r12, lr, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #16] + ldr r5, [r1, #20] +#else + ldrd r4, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif and r7, r7, #0x7fffffff - strd r2, r3, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0] + str r3, [r0, #4] +#else + strd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] - strd r4, r5, [r0, #16] - strd r6, r7, [r0, #24] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif pop {r4, r5, r6, r7, pc} .size fe_frombytes,.-fe_frombytes .text @@ -62,11 +100,30 @@ fe_frombytes: .type fe_tobytes, %function fe_tobytes: push {r4, r5, r6, r7, r8, lr} - ldrd r2, r3, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1] + ldr r3, [r1, #4] +#else + ldrd r2, r3, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #8] ldr lr, [r1, #12] - ldrd r4, r5, [r1, #16] - ldrd r6, r7, [r1, #24] +#else + ldrd r12, lr, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #16] + ldr r5, [r1, #20] +#else + ldrd r4, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif adds r8, r2, #19 adcs r8, r3, #0 adcs r8, r12, #0 @@ -86,11 +143,30 @@ fe_tobytes: adcs r6, r6, #0 adc r7, r7, #0 and r7, r7, #0x7fffffff - strd r2, r3, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0] + str r3, [r0, #4] +#else + strd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] - strd r4, r5, [r0, #16] - strd r6, r7, [r0, #24] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif pop {r4, r5, r6, r7, r8, pc} .size fe_tobytes,.-fe_tobytes .text @@ -101,14 +177,30 @@ fe_1: # Set one mov r2, #1 mov r1, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r2, [r0] str r1, [r0, #4] +#else + strd r2, r1, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #8] str r1, [r0, #12] +#else + strd r1, r1, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #16] str r1, [r0, #20] +#else + strd r1, r1, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #24] str r1, [r0, #28] +#else + strd r1, r1, [r0, #24] +#endif bx lr .size fe_1,.-fe_1 .text @@ -118,14 +210,30 @@ fe_1: fe_0: # Set zero mov r1, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0] str r1, [r0, #4] +#else + strd r1, r1, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #8] str r1, [r0, #12] +#else + strd r1, r1, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #16] str r1, [r0, #20] +#else + strd r1, r1, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r1, [r0, #24] str r1, [r0, #28] +#else + strd r1, r1, [r0, #24] +#endif bx lr .size fe_0,.-fe_0 .text @@ -135,18 +243,54 @@ fe_0: fe_copy: push {lr} # Copy - ldrd r2, r3, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1] + ldr r3, [r1, #4] +#else + ldrd r2, r3, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #8] ldr lr, [r1, #12] - strd r2, r3, [r0] +#else + ldrd r12, lr, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0] + str r3, [r0, #4] +#else + strd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] - ldrd r2, r3, [r1, #16] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1, #16] + ldr r3, [r1, #20] +#else + ldrd r2, r3, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #24] ldr lr, [r1, #28] - strd r2, r3, [r0, #16] +#else + ldrd r12, lr, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0, #16] + str r3, [r0, #20] +#else + strd r2, r3, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif pop {pc} .size fe_copy,.-fe_copy .text @@ -156,22 +300,70 @@ fe_copy: fe_sub: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1] ldr lr, [r1, #4] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r2] - ldrd r8, r9, [r2, #8] +#else + ldrd r12, lr, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #8] + ldr r5, [r1, #12] +#else + ldrd r4, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2] + ldr r7, [r2, #4] +#else + ldrd r6, r7, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #8] + ldr r9, [r2, #12] +#else + ldrd r8, r9, [r2, #8] +#endif subs r6, r12, r6 sbcs r7, lr, r7 sbcs r8, r4, r8 sbcs r9, r5, r9 - strd r6, r7, [r0] - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #16] ldr lr, [r1, #20] - ldrd r4, r5, [r1, #24] - ldrd r6, r7, [r2, #16] - ldrd r8, r9, [r2, #24] +#else + ldrd r12, lr, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #24] + ldr r5, [r1, #28] +#else + ldrd r4, r5, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #16] + ldr r7, [r2, #20] +#else + ldrd r6, r7, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #24] + ldr r9, [r2, #28] +#else + ldrd r8, r9, [r2, #24] +#endif sbcs r6, r12, r6 sbcs r7, lr, r7 sbcs r8, r4, r8 @@ -182,9 +374,18 @@ fe_sub: and r10, r3, r10 and r11, r3, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif adds r12, r12, r10 adcs lr, lr, r3 adcs r4, r4, r3 @@ -193,11 +394,30 @@ fe_sub: adcs r7, r7, r3 adcs r8, r8, r3 adc r9, r9, r11 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] - strd r4, r5, [r0, #8] - strd r6, r7, [r0, #16] - strd r8, r9, [r0, #24] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_sub,.-fe_sub .text @@ -207,22 +427,70 @@ fe_sub: fe_add: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1] ldr lr, [r1, #4] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r2] - ldrd r8, r9, [r2, #8] +#else + ldrd r12, lr, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #8] + ldr r5, [r1, #12] +#else + ldrd r4, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2] + ldr r7, [r2, #4] +#else + ldrd r6, r7, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #8] + ldr r9, [r2, #12] +#else + ldrd r8, r9, [r2, #8] +#endif adds r6, r12, r6 adcs r7, lr, r7 adcs r8, r4, r8 adcs r9, r5, r9 - strd r6, r7, [r0] - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #16] ldr lr, [r1, #20] - ldrd r4, r5, [r1, #24] - ldrd r6, r7, [r2, #16] - ldrd r8, r9, [r2, #24] +#else + ldrd r12, lr, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #24] + ldr r5, [r1, #28] +#else + ldrd r4, r5, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #16] + ldr r7, [r2, #20] +#else + ldrd r6, r7, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #24] + ldr r9, [r2, #28] +#else + ldrd r8, r9, [r2, #24] +#endif adcs r6, r12, r6 adcs r7, lr, r7 adcs r8, r4, r8 @@ -233,9 +501,18 @@ fe_add: and r10, r3, r10 and r11, r3, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif subs r12, r12, r10 sbcs lr, lr, r3 sbcs r4, r4, r3 @@ -244,11 +521,30 @@ fe_add: sbcs r7, r7, r3 sbcs r8, r8, r3 sbc r9, r9, r11 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] - strd r4, r5, [r0, #8] - strd r6, r7, [r0, #16] - strd r8, r9, [r0, #24] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_add,.-fe_add .text @@ -259,27 +555,69 @@ fe_neg: push {r4, r5, lr} mov r5, #-1 mov r4, #-19 - ldrd r2, r3, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1] + ldr r3, [r1, #4] +#else + ldrd r2, r3, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #8] ldr lr, [r1, #12] +#else + ldrd r12, lr, [r1, #8] +#endif subs r2, r4, r2 sbcs r3, r5, r3 sbcs r12, r5, r12 sbcs lr, r5, lr - strd r2, r3, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0] + str r3, [r0, #4] +#else + strd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r4, #0x7fffff + lsl r4, r4, #8 + add r4, r4, #0xff +#else mov r4, #0x7fffffff - ldrd r2, r3, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1, #16] + ldr r3, [r1, #20] +#else + ldrd r2, r3, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #24] ldr lr, [r1, #28] +#else + ldrd r12, lr, [r1, #24] +#endif sbcs r2, r5, r2 sbcs r3, r5, r3 sbcs r12, r5, r12 sbc lr, r4, lr - strd r2, r3, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0, #16] + str r3, [r0, #20] +#else + strd r2, r3, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif pop {r4, r5, pc} .size fe_neg,.-fe_neg .text @@ -288,11 +626,30 @@ fe_neg: .type fe_isnonzero, %function fe_isnonzero: push {r4, r5, r6, r7, r8, lr} - ldrd r2, r3, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #4] +#else + ldrd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r1, r2, #19 adcs r1, r3, #0 adcs r1, r12, #0 @@ -327,16 +684,34 @@ fe_isnonzero: .type fe_isnegative, %function fe_isnegative: push {lr} - ldrd r2, r3, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #4] +#else + ldrd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif adds r1, r2, #19 adcs r1, r3, #0 adcs r1, r12, #0 adcs r1, lr, #0 - ldrd r2, r3, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r0, #16] + ldr r3, [r0, #20] +#else + ldrd r2, r3, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif adcs r1, r2, #0 adcs r1, r3, #0 adcs r1, r12, #0 @@ -363,25 +738,46 @@ fe_cmov_table: mov r4, #0 mov r5, #0 mov r6, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #31 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -389,25 +785,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #30 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -415,25 +832,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #29 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -441,25 +879,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #28 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -467,25 +926,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #27 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -493,25 +973,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #26 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -519,25 +1020,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #25 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -545,25 +1067,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #24 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #32] + ldr r9, [r1, #36] +#else + ldrd r8, r9, [r1, #32] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #64] + ldr r9, [r1, #68] +#else + ldrd r8, r9, [r1, #64] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -591,12 +1134,24 @@ fe_cmov_table: eor r9, r9, r6 and r9, r9, r10 eor r6, r6, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r12, [r0, #4] +#else + strd r3, r12, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str lr, [r0, #32] str r4, [r0, #36] +#else + strd lr, r4, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r5, [r0, #64] str r6, [r0, #68] +#else + strd r5, r6, [r0, #64] +#endif sbfx r7, r2, #7, #1 eor r10, r2, r7 sub r10, r10, r7 @@ -606,25 +1161,46 @@ fe_cmov_table: mov r4, #0 mov r5, #0 mov r6, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #31 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -632,25 +1208,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #30 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -658,25 +1255,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #29 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -684,25 +1302,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #28 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -710,25 +1349,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #27 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -736,25 +1396,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #26 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -762,25 +1443,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #25 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -788,25 +1490,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #24 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #8] + ldr r9, [r1, #12] +#else + ldrd r8, r9, [r1, #8] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #40] + ldr r9, [r1, #44] +#else + ldrd r8, r9, [r1, #40] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #72] + ldr r9, [r1, #76] +#else + ldrd r8, r9, [r1, #72] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -835,12 +1558,24 @@ fe_cmov_table: eor r9, r9, r6 and r9, r9, r10 eor r6, r6, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r12, [r0, #12] +#else + strd r3, r12, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str lr, [r0, #40] str r4, [r0, #44] +#else + strd lr, r4, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r5, [r0, #72] str r6, [r0, #76] +#else + strd r5, r6, [r0, #72] +#endif sbfx r7, r2, #7, #1 eor r10, r2, r7 sub r10, r10, r7 @@ -850,25 +1585,46 @@ fe_cmov_table: mov r4, #0 mov r5, #0 mov r6, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #31 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -876,25 +1632,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #30 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -902,25 +1679,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #29 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -928,25 +1726,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #28 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -954,25 +1773,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #27 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -980,25 +1820,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #26 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1006,25 +1867,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #25 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1032,25 +1914,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #24 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #48] + ldr r9, [r1, #52] +#else + ldrd r8, r9, [r1, #48] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #80] + ldr r9, [r1, #84] +#else + ldrd r8, r9, [r1, #80] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1079,12 +1982,24 @@ fe_cmov_table: eor r9, r9, r6 and r9, r9, r10 eor r6, r6, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r12, [r0, #20] +#else + strd r3, r12, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str lr, [r0, #48] str r4, [r0, #52] +#else + strd lr, r4, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r5, [r0, #80] str r6, [r0, #84] +#else + strd r5, r6, [r0, #80] +#endif sbfx r7, r2, #7, #1 eor r10, r2, r7 sub r10, r10, r7 @@ -1094,25 +2009,46 @@ fe_cmov_table: mov r4, #0 mov r5, #0 mov r6, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #31 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1120,25 +2056,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #30 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1146,25 +2103,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #29 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1172,25 +2150,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #28 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1198,25 +2197,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #27 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1224,25 +2244,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #26 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1250,25 +2291,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #25 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1276,25 +2338,46 @@ fe_cmov_table: eor r5, r5, r8 eor r6, r6, r9 add r1, r1, #0x60 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r7, #0x800000 + lsl r7, r7, #8 + add r7, r7, #0x0 +#else mov r7, #0x80000000 +#endif ror r7, r7, #24 ror r7, r7, r10 asr r7, r7, #31 - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif eor r8, r8, r3 eor r9, r9, r12 and r8, r8, r7 and r9, r9, r7 eor r3, r3, r8 eor r12, r12, r9 - ldrd r8, r9, [r1, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif eor r8, r8, lr eor r9, r9, r4 and r8, r8, r7 and r9, r9, r7 eor lr, lr, r8 eor r4, r4, r9 - ldrd r8, r9, [r1, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif eor r8, r8, r5 eor r9, r9, r6 and r8, r8, r7 @@ -1303,7 +2386,13 @@ fe_cmov_table: eor r6, r6, r9 sub r1, r1, #0x2a0 mov r8, #-1 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + mov r9, #0x7fffff + lsl r9, r9, #8 + add r9, r9, #0xff +#else mov r9, #0x7fffffff +#endif rsbs r11, r11, #0 sbcs r8, r8, r5 sbc r9, r9, r6 @@ -1322,12 +2411,24 @@ fe_cmov_table: eor r9, r9, r6 and r9, r9, r10 eor r6, r6, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #24] str r12, [r0, #28] +#else + strd r3, r12, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str lr, [r0, #56] str r4, [r0, #60] +#else + strd lr, r4, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r5, [r0, #88] str r6, [r0, #92] +#else + strd r5, r6, [r0, #88] +#endif pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_cmov_table,.-fe_cmov_table .text @@ -1752,10 +2853,30 @@ fe_mul: str r4, [sp, #60] # Reduce # Load bottom half - ldrd r4, r5, [sp] - ldrd r6, r7, [sp, #8] - ldrd r8, r9, [sp, #16] - ldrd r10, r11, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #24] + ldr r11, [sp, #28] +#else + ldrd r10, r11, [sp, #24] +#endif lsr r3, r11, #31 and r11, r11, #0x7fffffff mov lr, #19 @@ -1852,10 +2973,30 @@ fe_mul: adcs r10, r10, #0 adc r11, r11, #0 # Store - strd r4, r5, [r0] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif add sp, sp, #0x40 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_mul,.-fe_mul @@ -2173,10 +3314,30 @@ fe_sq: str r4, [sp, #60] # Reduce # Load bottom half - ldrd r4, r5, [sp] - ldrd r6, r7, [sp, #8] - ldrd r8, r9, [sp, #16] - ldrd r10, r11, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #24] + ldr r11, [sp, #28] +#else + ldrd r10, r11, [sp, #24] +#endif lsr r2, r11, #31 and r11, r11, #0x7fffffff mov r12, #19 @@ -2273,10 +3434,30 @@ fe_sq: adcs r10, r10, #0 adc r11, r11, #0 # Store - strd r4, r5, [r0] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif add sp, sp, #0x40 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_sq,.-fe_sq @@ -2287,10 +3468,30 @@ fe_sq: fe_mul121666: push {r4, r5, r6, r7, r8, r9, r10, lr} # Multiply by 121666 - ldrd r2, r3, [r1] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r1, #16] - ldrd r8, r9, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r1] + ldr r3, [r1, #4] +#else + ldrd r2, r3, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #8] + ldr r5, [r1, #12] +#else + ldrd r4, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #16] + ldr r7, [r1, #20] +#else + ldrd r6, r7, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif movw lr, #0xdb42 movt lr, #1 umull r2, r10, r2, lr @@ -2328,10 +3529,30 @@ fe_mul121666: adcs r7, r7, #0 adcs r8, r8, #0 adc r9, r9, #0 - strd r2, r3, [r0] - strd r4, r5, [r0, #8] - strd r6, r7, [r0, #16] - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r0] + str r3, [r0, #4] +#else + strd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif pop {r4, r5, r6, r7, r8, r9, r10, pc} .size fe_mul121666,.-fe_mul121666 .text @@ -2648,10 +3869,30 @@ fe_sq2: str r4, [sp, #60] # Double and Reduce # Load bottom half - ldrd r4, r5, [sp] - ldrd r6, r7, [sp, #8] - ldrd r8, r9, [sp, #16] - ldrd r10, r11, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #16] + ldr r9, [sp, #20] +#else + ldrd r8, r9, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #24] + ldr r11, [sp, #28] +#else + ldrd r10, r11, [sp, #24] +#endif lsr r2, r11, #30 lsl r11, r11, #1 orr r11, r11, r10, lsr #31 @@ -2763,10 +4004,30 @@ fe_sq2: adcs r10, r10, #0 adc r11, r11, #0 # Store - strd r4, r5, [r0] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif add sp, sp, #0x40 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_sq2,.-fe_sq2 @@ -2935,44 +4196,132 @@ curve25519: # Set one mov r11, #1 mov r10, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r11, [r0] str r10, [r0, #4] +#else + strd r11, r10, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [r0, #8] str r10, [r0, #12] +#else + strd r10, r10, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [r0, #16] str r10, [r0, #20] +#else + strd r10, r10, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [r0, #24] str r10, [r0, #28] +#else + strd r10, r10, [r0, #24] +#endif # Set zero mov r10, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp] str r10, [sp, #4] +#else + strd r10, r10, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #8] str r10, [sp, #12] +#else + strd r10, r10, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #16] str r10, [sp, #20] +#else + strd r10, r10, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #24] str r10, [sp, #28] +#else + strd r10, r10, [sp, #24] +#endif # Set one mov r11, #1 mov r10, #0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r11, [sp, #32] str r10, [sp, #36] +#else + strd r11, r10, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #40] str r10, [sp, #44] +#else + strd r10, r10, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #48] str r10, [sp, #52] +#else + strd r10, r10, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r10, [sp, #56] str r10, [sp, #60] +#else + strd r10, r10, [sp, #56] +#endif # Copy - ldrd r4, r5, [r2] - ldrd r6, r7, [r2, #8] - strd r4, r5, [sp, #64] - strd r6, r7, [sp, #72] - ldrd r4, r5, [r2, #16] - ldrd r6, r7, [r2, #24] - strd r4, r5, [sp, #80] - strd r6, r7, [sp, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r2] + ldr r5, [r2, #4] +#else + ldrd r4, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #8] + ldr r7, [r2, #12] +#else + ldrd r6, r7, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #72] + str r7, [sp, #76] +#else + strd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r2, #16] + ldr r5, [r2, #20] +#else + ldrd r4, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #24] + ldr r7, [r2, #28] +#else + ldrd r6, r7, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #80] + str r5, [sp, #84] +#else + strd r4, r5, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #88] + str r7, [sp, #92] +#else + strd r6, r7, [sp, #88] +#endif mov r1, #30 str r1, [sp, #180] mov r2, #28 @@ -2991,8 +4340,18 @@ L_curve25519_bits: ldr r0, [sp, #160] # Conditional Swap neg r1, r1 - ldrd r4, r5, [r0] - ldrd r6, r7, [sp, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #68] +#else + ldrd r6, r7, [sp, #64] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3001,10 +4360,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [r0] - strd r6, r7, [sp, #64] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [sp, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #64] + str r7, [sp, #68] +#else + strd r6, r7, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #76] +#else + ldrd r6, r7, [sp, #72] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3013,10 +4392,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [r0, #8] - strd r6, r7, [sp, #72] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [sp, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #72] + str r7, [sp, #76] +#else + strd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #84] +#else + ldrd r6, r7, [sp, #80] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3025,10 +4424,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [r0, #16] - strd r6, r7, [sp, #80] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [sp, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #80] + str r7, [sp, #84] +#else + strd r6, r7, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #88] + ldr r7, [sp, #92] +#else + ldrd r6, r7, [sp, #88] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3037,13 +4456,33 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [r0, #24] - strd r6, r7, [sp, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #88] + str r7, [sp, #92] +#else + strd r6, r7, [sp, #88] +#endif ldr r1, [sp, #172] # Conditional Swap neg r1, r1 - ldrd r4, r5, [sp] - ldrd r6, r7, [sp, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #32] + ldr r7, [sp, #36] +#else + ldrd r6, r7, [sp, #32] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3052,10 +4491,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [sp] - strd r6, r7, [sp, #32] - ldrd r4, r5, [sp, #8] - ldrd r6, r7, [sp, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #32] + str r7, [sp, #36] +#else + strd r6, r7, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #44] +#else + ldrd r6, r7, [sp, #40] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3064,10 +4523,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [sp, #8] - strd r6, r7, [sp, #40] - ldrd r4, r5, [sp, #16] - ldrd r6, r7, [sp, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #40] + str r7, [sp, #44] +#else + strd r6, r7, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #52] +#else + ldrd r6, r7, [sp, #48] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3076,10 +4555,30 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [sp, #16] - strd r6, r7, [sp, #48] - ldrd r4, r5, [sp, #24] - ldrd r6, r7, [sp, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #48] + str r7, [sp, #52] +#else + strd r6, r7, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #28] +#else + ldrd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #60] +#else + ldrd r6, r7, [sp, #56] +#endif eor r8, r4, r6 eor r9, r5, r7 and r8, r8, r1 @@ -3088,60 +4587,140 @@ L_curve25519_bits: eor r5, r5, r9 eor r6, r6, r8 eor r7, r7, r9 - strd r4, r5, [sp, #24] - strd r6, r7, [sp, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #24] + str r5, [sp, #28] +#else + strd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #56] + str r7, [sp, #60] +#else + strd r6, r7, [sp, #56] +#endif ldr r1, [sp, #184] str r1, [sp, #172] # Add-Sub # Add - ldrd r4, r5, [r0] - ldrd r6, r7, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp] + ldr r7, [sp, #4] +#else + ldrd r6, r7, [sp] +#endif adds r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #128] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #128] + str r11, [sp, #132] +#else + strd r10, r11, [sp, #128] +#endif # Add - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #136] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #136] + str r11, [sp, #140] +#else + strd r10, r11, [sp, #136] +#endif # Add - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #16] + ldr r7, [sp, #20] +#else + ldrd r6, r7, [sp, #16] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #144] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #144] + str r11, [sp, #148] +#else + strd r10, r11, [sp, #144] +#endif # Add - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #24] + ldr r7, [sp, #28] +#else + ldrd r6, r7, [sp, #24] +#endif adds r3, r3, #-1 adcs r8, r4, r6 adc r9, r5, r7 @@ -3155,92 +4734,232 @@ L_curve25519_bits: and r3, r2, r3 and r12, r2, #0x7fffffff # Sub modulus (if overflow) - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif subs r4, r4, r3 sbcs r5, r5, r2 - strd r4, r5, [r0] - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [r0, #8] - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif sbcs r8, r8, r2 sbc r9, r9, r12 - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r3, #-19 asr r2, r11, #31 # Mask the modulus and r3, r2, r3 and r12, r2, #0x7fffffff # Add modulus (if underflow) - ldrd r4, r5, [sp, #128] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #128] + ldr r5, [sp, #132] +#else + ldrd r4, r5, [sp, #128] +#endif adds r4, r4, r3 adcs r5, r5, r2 - strd r4, r5, [sp, #128] - ldrd r4, r5, [sp, #136] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #128] + str r5, [sp, #132] +#else + strd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #136] + ldr r5, [sp, #140] +#else + ldrd r4, r5, [sp, #136] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #136] - ldrd r4, r5, [sp, #144] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #136] + str r5, [sp, #140] +#else + strd r4, r5, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #144] + ldr r5, [sp, #148] +#else + ldrd r4, r5, [sp, #144] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #144] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #144] + str r5, [sp, #148] +#else + strd r4, r5, [sp, #144] +#endif adcs r10, r10, r2 adc r11, r11, r12 - strd r10, r11, [sp, #152] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #152] + str r11, [sp, #156] +#else + strd r10, r11, [sp, #152] +#endif # Add-Sub # Add - ldrd r4, r5, [sp, #64] - ldrd r6, r7, [sp, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #32] + ldr r7, [sp, #36] +#else + ldrd r6, r7, [sp, #32] +#endif adds r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp] + str r9, [sp, #4] +#else + strd r8, r9, [sp] +#endif # Sub subs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #96] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #96] + str r11, [sp, #100] +#else + strd r10, r11, [sp, #96] +#endif # Add - ldrd r4, r5, [sp, #72] - ldrd r6, r7, [sp, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #44] +#else + ldrd r6, r7, [sp, #40] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #8] + str r9, [sp, #12] +#else + strd r8, r9, [sp, #8] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #104] + str r11, [sp, #108] +#else + strd r10, r11, [sp, #104] +#endif # Add - ldrd r4, r5, [sp, #80] - ldrd r6, r7, [sp, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #52] +#else + ldrd r6, r7, [sp, #48] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #16] + str r9, [sp, #20] +#else + strd r8, r9, [sp, #16] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #112] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #112] + str r11, [sp, #116] +#else + strd r10, r11, [sp, #112] +#endif # Add - ldrd r4, r5, [sp, #88] - ldrd r6, r7, [sp, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #88] + ldr r5, [sp, #92] +#else + ldrd r4, r5, [sp, #88] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #60] +#else + ldrd r6, r7, [sp, #56] +#endif adds r3, r3, #-1 adcs r8, r4, r6 adc r9, r5, r7 @@ -3254,42 +4973,112 @@ L_curve25519_bits: and r3, r2, r3 and r12, r2, #0x7fffffff # Sub modulus (if overflow) - ldrd r4, r5, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif subs r4, r4, r3 sbcs r5, r5, r2 - strd r4, r5, [sp] - ldrd r4, r5, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [sp, #8] - ldrd r4, r5, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif sbcs r8, r8, r2 sbc r9, r9, r12 - strd r8, r9, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #24] + str r9, [sp, #28] +#else + strd r8, r9, [sp, #24] +#endif mov r3, #-19 asr r2, r11, #31 # Mask the modulus and r3, r2, r3 and r12, r2, #0x7fffffff # Add modulus (if underflow) - ldrd r4, r5, [sp, #96] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif adds r4, r4, r3 adcs r5, r5, r2 - strd r4, r5, [sp, #96] - ldrd r4, r5, [sp, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #96] + str r5, [sp, #100] +#else + strd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #104] + ldr r5, [sp, #108] +#else + ldrd r4, r5, [sp, #104] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #104] - ldrd r4, r5, [sp, #112] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #104] + str r5, [sp, #108] +#else + strd r4, r5, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #112] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #112] + str r5, [sp, #116] +#else + strd r4, r5, [sp, #112] +#endif adcs r10, r10, r2 adc r11, r11, r12 - strd r10, r11, [sp, #120] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #120] + str r11, [sp, #124] +#else + strd r10, r11, [sp, #120] +#endif ldr r2, [sp, #160] add r1, sp, #0x60 add r0, sp, #32 @@ -3306,54 +5095,124 @@ L_curve25519_bits: bl fe_sq # Add-Sub # Add - ldrd r4, r5, [sp, #32] - ldrd r6, r7, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #36] +#else + ldrd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp] + ldr r7, [sp, #4] +#else + ldrd r6, r7, [sp] +#endif adds r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #64] + str r9, [sp, #68] +#else + strd r8, r9, [sp, #64] +#endif # Sub subs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp] + str r11, [sp, #4] +#else + strd r10, r11, [sp] +#endif # Add - ldrd r4, r5, [sp, #40] - ldrd r6, r7, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #44] +#else + ldrd r4, r5, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #72] + str r9, [sp, #76] +#else + strd r8, r9, [sp, #72] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #8] + str r11, [sp, #12] +#else + strd r10, r11, [sp, #8] +#endif # Add - ldrd r4, r5, [sp, #48] - ldrd r6, r7, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #52] +#else + ldrd r4, r5, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #16] + ldr r7, [sp, #20] +#else + ldrd r6, r7, [sp, #16] +#endif adds r3, r3, #-1 adcs r8, r4, r6 mov r3, #0 adcs r9, r5, r7 adc r3, r3, #0 - strd r8, r9, [sp, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #80] + str r9, [sp, #84] +#else + strd r8, r9, [sp, #80] +#endif # Sub adds r12, r12, #-1 sbcs r10, r4, r6 mov r12, #0 sbcs r11, r5, r7 adc r12, r12, #0 - strd r10, r11, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #16] + str r11, [sp, #20] +#else + strd r10, r11, [sp, #16] +#endif # Add - ldrd r4, r5, [sp, #56] - ldrd r6, r7, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #60] +#else + ldrd r4, r5, [sp, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #24] + ldr r7, [sp, #28] +#else + ldrd r6, r7, [sp, #24] +#endif adds r3, r3, #-1 adcs r8, r4, r6 adc r9, r5, r7 @@ -3367,61 +5226,181 @@ L_curve25519_bits: and r3, r2, r3 and r12, r2, #0x7fffffff # Sub modulus (if overflow) - ldrd r4, r5, [sp, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #68] +#else + ldrd r4, r5, [sp, #64] +#endif subs r4, r4, r3 sbcs r5, r5, r2 - strd r4, r5, [sp, #64] - ldrd r4, r5, [sp, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #64] + str r5, [sp, #68] +#else + strd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #76] +#else + ldrd r4, r5, [sp, #72] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [sp, #72] - ldrd r4, r5, [sp, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #72] + str r5, [sp, #76] +#else + strd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #80] + ldr r5, [sp, #84] +#else + ldrd r4, r5, [sp, #80] +#endif sbcs r4, r4, r2 sbcs r5, r5, r2 - strd r4, r5, [sp, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #80] + str r5, [sp, #84] +#else + strd r4, r5, [sp, #80] +#endif sbcs r8, r8, r2 sbc r9, r9, r12 - strd r8, r9, [sp, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #88] + str r9, [sp, #92] +#else + strd r8, r9, [sp, #88] +#endif mov r3, #-19 asr r2, r11, #31 # Mask the modulus and r3, r2, r3 and r12, r2, #0x7fffffff # Add modulus (if underflow) - ldrd r4, r5, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #4] +#else + ldrd r4, r5, [sp] +#endif adds r4, r4, r3 adcs r5, r5, r2 - strd r4, r5, [sp] - ldrd r4, r5, [sp, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #4] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #12] +#else + ldrd r4, r5, [sp, #8] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #8] - ldrd r4, r5, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #20] +#else + ldrd r4, r5, [sp, #16] +#endif adcs r4, r4, r2 adcs r5, r5, r2 - strd r4, r5, [sp, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #20] +#else + strd r4, r5, [sp, #16] +#endif adcs r10, r10, r2 adc r11, r11, r12 - strd r10, r11, [sp, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #24] + str r11, [sp, #28] +#else + strd r10, r11, [sp, #24] +#endif add r2, sp, #0x60 add r1, sp, #0x80 ldr r0, [sp, #160] bl fe_mul # Sub - ldrd r4, r5, [sp, #128] - ldrd r6, r7, [sp, #136] - ldrd r8, r9, [sp, #96] - ldrd r10, r11, [sp, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #128] + ldr r5, [sp, #132] +#else + ldrd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #136] + ldr r7, [sp, #140] +#else + ldrd r6, r7, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #96] + ldr r9, [sp, #100] +#else + ldrd r8, r9, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #104] + ldr r11, [sp, #108] +#else + ldrd r10, r11, [sp, #104] +#endif subs r8, r4, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [sp, #128] - strd r10, r11, [sp, #136] - ldrd r4, r5, [sp, #144] - ldrd r6, r7, [sp, #152] - ldrd r8, r9, [sp, #112] - ldrd r10, r11, [sp, #120] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #128] + str r9, [sp, #132] +#else + strd r8, r9, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #136] + str r11, [sp, #140] +#else + strd r10, r11, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #144] + ldr r5, [sp, #148] +#else + ldrd r4, r5, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #152] + ldr r7, [sp, #156] +#else + ldrd r6, r7, [sp, #152] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #116] +#else + ldrd r8, r9, [sp, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #120] + ldr r11, [sp, #124] +#else + ldrd r10, r11, [sp, #120] +#endif sbcs r8, r4, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -3432,8 +5411,18 @@ L_curve25519_bits: and r3, r2, r3 and r12, r2, #0x7fffffff # Add modulus (if underflow) - ldrd r4, r5, [sp, #128] - ldrd r6, r7, [sp, #136] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #128] + ldr r5, [sp, #132] +#else + ldrd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #136] + ldr r7, [sp, #140] +#else + ldrd r6, r7, [sp, #136] +#endif adds r4, r4, r3 adcs r5, r5, r2 adcs r6, r6, r2 @@ -3442,18 +5431,58 @@ L_curve25519_bits: adcs r9, r9, r2 adcs r10, r10, r2 adc r11, r11, r12 - strd r4, r5, [sp, #128] - strd r6, r7, [sp, #136] - strd r8, r9, [sp, #144] - strd r10, r11, [sp, #152] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #128] + str r5, [sp, #132] +#else + strd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #136] + str r7, [sp, #140] +#else + strd r6, r7, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #144] + str r9, [sp, #148] +#else + strd r8, r9, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #152] + str r11, [sp, #156] +#else + strd r10, r11, [sp, #152] +#endif add r1, sp, #0 add r0, sp, #0 bl fe_sq # Multiply by 121666 - ldrd r4, r5, [sp, #128] - ldrd r6, r7, [sp, #136] - ldrd r8, r9, [sp, #144] - ldrd r10, r11, [sp, #152] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #128] + ldr r5, [sp, #132] +#else + ldrd r4, r5, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #136] + ldr r7, [sp, #140] +#else + ldrd r6, r7, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #144] + ldr r9, [sp, #148] +#else + ldrd r8, r9, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #152] + ldr r11, [sp, #156] +#else + ldrd r10, r11, [sp, #152] +#endif movw r12, #0xdb42 movt r12, #1 umull r4, r2, r4, r12 @@ -3491,28 +5520,98 @@ L_curve25519_bits: adcs r9, r9, #0 adcs r10, r10, #0 adc r11, r11, #0 - strd r4, r5, [sp, #32] - strd r6, r7, [sp, #40] - strd r8, r9, [sp, #48] - strd r10, r11, [sp, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #36] +#else + strd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #40] + str r7, [sp, #44] +#else + strd r6, r7, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #48] + str r9, [sp, #52] +#else + strd r8, r9, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #56] + str r11, [sp, #60] +#else + strd r10, r11, [sp, #56] +#endif add r1, sp, #0x40 add r0, sp, #0x40 bl fe_sq # Add - ldrd r4, r5, [sp, #96] - ldrd r6, r7, [sp, #104] - ldrd r8, r9, [sp, #32] - ldrd r10, r11, [sp, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #108] +#else + ldrd r6, r7, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #32] + ldr r9, [sp, #36] +#else + ldrd r8, r9, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #40] + ldr r11, [sp, #44] +#else + ldrd r10, r11, [sp, #40] +#endif adds r8, r4, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [sp, #96] - strd r10, r11, [sp, #104] - ldrd r4, r5, [sp, #112] - ldrd r6, r7, [sp, #120] - ldrd r8, r9, [sp, #48] - ldrd r10, r11, [sp, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #96] + str r9, [sp, #100] +#else + strd r8, r9, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #104] + str r11, [sp, #108] +#else + strd r10, r11, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #112] + ldr r5, [sp, #116] +#else + ldrd r4, r5, [sp, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #120] + ldr r7, [sp, #124] +#else + ldrd r6, r7, [sp, #120] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #48] + ldr r9, [sp, #52] +#else + ldrd r8, r9, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [sp, #56] + ldr r11, [sp, #60] +#else + ldrd r10, r11, [sp, #56] +#endif adcs r8, r4, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -3523,8 +5622,18 @@ L_curve25519_bits: and r3, r2, r3 and r12, r2, #0x7fffffff # Sub modulus (if overflow) - ldrd r4, r5, [sp, #96] - ldrd r6, r7, [sp, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [sp, #96] + ldr r5, [sp, #100] +#else + ldrd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #108] +#else + ldrd r6, r7, [sp, #104] +#endif subs r4, r4, r3 sbcs r5, r5, r2 sbcs r6, r6, r2 @@ -3533,10 +5642,30 @@ L_curve25519_bits: sbcs r9, r9, r2 sbcs r10, r10, r2 sbc r11, r11, r12 - strd r4, r5, [sp, #96] - strd r6, r7, [sp, #104] - strd r8, r9, [sp, #112] - strd r10, r11, [sp, #120] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #96] + str r5, [sp, #100] +#else + strd r4, r5, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #104] + str r7, [sp, #108] +#else + strd r6, r7, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #112] + str r9, [sp, #116] +#else + strd r8, r9, [sp, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [sp, #120] + str r11, [sp, #124] +#else + strd r10, r11, [sp, #120] +#endif add r2, sp, #0 ldr r1, [sp, #168] add r0, sp, #32 @@ -3926,22 +6055,70 @@ fe_ge_dbl: ldr r1, [sp, #52] ldr r2, [sp, #56] # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif adds r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif adcs r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -3952,9 +6129,18 @@ fe_ge_dbl: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif subs r3, r3, r12 sbcs r5, r5, r4 sbcs r6, r6, r4 @@ -3963,11 +6149,30 @@ fe_ge_dbl: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r1, [sp, #4] ldr r0, [sp, #12] bl fe_sq @@ -3976,58 +6181,124 @@ fe_ge_dbl: ldr r2, [sp] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r2] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2] + ldr r7, [r2, #4] +#else + ldrd r6, r7, [r2] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] - ldrd r6, r7, [r2, #8] +#else + ldrd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #8] + ldr r7, [r2, #12] +#else + ldrd r6, r7, [r2, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r2, #16] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #16] + ldr r7, [r2, #20] +#else + ldrd r6, r7, [r2, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #24] ldr r5, [r1, #28] - ldrd r6, r7, [r2, #24] +#else + ldrd r3, r5, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r2, #24] + ldr r7, [r2, #28] +#else + ldrd r6, r7, [r2, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -4041,74 +6312,180 @@ fe_ge_dbl: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif ldr r0, [sp] ldr r1, [sp, #12] ldr r2, [sp, #4] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -4119,9 +6496,18 @@ fe_ge_dbl: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -4130,33 +6516,100 @@ fe_ge_dbl: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r1, [sp, #60] ldr r0, [sp, #12] bl fe_sq2 ldr r0, [sp, #12] ldr r1, [sp, #8] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] - ldrd r8, r9, [r1] - ldrd r10, r11, [r1, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1] + ldr r9, [r1, #4] +#else + ldrd r8, r9, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #8] + ldr r11, [r1, #12] +#else + ldrd r10, r11, [r1, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] - ldrd r6, r7, [r0, #24] - ldrd r8, r9, [r1, #16] - ldrd r10, r11, [r1, #24] +#else + ldrd r3, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #24] + ldr r11, [r1, #28] +#else + ldrd r10, r11, [r1, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -4167,9 +6620,18 @@ fe_ge_dbl: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -4178,11 +6640,30 @@ fe_ge_dbl: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif add sp, sp, #16 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_ge_dbl,.-fe_ge_dbl @@ -4201,22 +6682,70 @@ fe_ge_madd: ldr r1, [sp, #72] ldr r2, [sp, #68] # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif adds r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif adcs r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -4227,9 +6756,18 @@ fe_ge_madd: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif subs r3, r3, r12 sbcs r5, r5, r4 sbcs r6, r6, r4 @@ -4238,31 +6776,98 @@ fe_ge_madd: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp, #72] ldr r2, [sp, #68] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -4273,9 +6878,18 @@ fe_ge_madd: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -4284,11 +6898,30 @@ fe_ge_madd: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r2, [sp, #88] ldr r1, [sp] ldr r0, [sp, #8] @@ -4306,58 +6939,124 @@ fe_ge_madd: ldr r2, [sp, #8] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -4371,62 +7070,139 @@ fe_ge_madd: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif ldr r0, [sp, #8] ldr r1, [sp, #76] # Double +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r1, #16] - ldrd r10, r11, [r1, #24] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #24] + ldr r11, [r1, #28] +#else + ldrd r10, r11, [r1, #24] +#endif adds r3, r3, r3 adcs r5, r5, r5 adcs r6, r6, r6 @@ -4449,67 +7225,152 @@ fe_ge_madd: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #8] ldr r1, [sp, #12] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r1] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1] + ldr r7, [r1, #4] +#else + ldrd r6, r7, [r1] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] - ldrd r6, r7, [r1, #8] +#else + ldrd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] - ldrd r6, r7, [r1, #16] +#else + ldrd r3, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #16] + ldr r7, [r1, #20] +#else + ldrd r6, r7, [r1, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #24] ldr r5, [r0, #28] - ldrd r6, r7, [r1, #24] +#else + ldrd r3, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -4523,54 +7384,112 @@ fe_ge_madd: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif add sp, sp, #32 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_ge_madd,.-fe_ge_madd @@ -4589,22 +7508,70 @@ fe_ge_msub: ldr r1, [sp, #72] ldr r2, [sp, #68] # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif adds r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif adcs r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -4615,9 +7582,18 @@ fe_ge_msub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif subs r3, r3, r12 sbcs r5, r5, r4 sbcs r6, r6, r4 @@ -4626,31 +7602,98 @@ fe_ge_msub: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp, #72] ldr r2, [sp, #68] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -4661,9 +7704,18 @@ fe_ge_msub: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -4672,11 +7724,30 @@ fe_ge_msub: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r2, [sp, #92] ldr r1, [sp] ldr r0, [sp, #8] @@ -4694,58 +7765,124 @@ fe_ge_msub: ldr r2, [sp, #8] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -4759,62 +7896,139 @@ fe_ge_msub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif ldr r0, [sp, #8] ldr r1, [sp, #76] # Double +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r1, #16] - ldrd r10, r11, [r1, #24] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #24] + ldr r11, [r1, #28] +#else + ldrd r10, r11, [r1, #24] +#endif adds r3, r3, r3 adcs r5, r5, r5 adcs r6, r6, r6 @@ -4837,67 +8051,152 @@ fe_ge_msub: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #12] ldr r1, [sp, #8] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #24] ldr r5, [r1, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -4911,54 +8210,112 @@ fe_ge_msub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif add sp, sp, #32 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_ge_msub,.-fe_ge_msub @@ -4977,22 +8334,70 @@ fe_ge_add: ldr r1, [sp, #136] ldr r2, [sp, #132] # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif adds r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif adcs r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -5003,9 +8408,18 @@ fe_ge_add: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif subs r3, r3, r12 sbcs r5, r5, r4 sbcs r6, r6, r4 @@ -5014,31 +8428,98 @@ fe_ge_add: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp, #136] ldr r2, [sp, #132] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -5049,9 +8530,18 @@ fe_ge_add: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -5060,11 +8550,30 @@ fe_ge_add: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r2, [sp, #156] ldr r1, [sp] ldr r0, [sp, #8] @@ -5084,11 +8593,30 @@ fe_ge_add: add r0, sp, #16 ldr r1, [sp] # Double +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r1, #16] - ldrd r10, r11, [r1, #24] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #24] + ldr r11, [r1, #28] +#else + ldrd r10, r11, [r1, #24] +#endif adds r3, r3, r3 adcs r5, r5, r5 adcs r6, r6, r6 @@ -5111,68 +8639,153 @@ fe_ge_add: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp] ldr r2, [sp, #8] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -5186,111 +8799,235 @@ fe_ge_add: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif ldr r0, [sp, #8] ldr r1, [sp, #12] add r2, sp, #16 # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r1] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1] + ldr r7, [r1, #4] +#else + ldrd r6, r7, [r1] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r1, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r1, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #16] + ldr r7, [r1, #20] +#else + ldrd r6, r7, [r1, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r1, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -5304,54 +9041,112 @@ fe_ge_add: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif add sp, sp, #0x60 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_ge_add,.-fe_ge_add @@ -5370,22 +9165,70 @@ fe_ge_sub: ldr r1, [sp, #136] ldr r2, [sp, #132] # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif adds r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 adcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif adcs r8, r3, r8 adcs r9, r5, r9 adcs r10, r6, r10 @@ -5396,9 +9239,18 @@ fe_ge_sub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif subs r3, r3, r12 sbcs r5, r5, r4 sbcs r6, r6, r4 @@ -5407,31 +9259,98 @@ fe_ge_sub: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp, #136] ldr r2, [sp, #132] # Sub +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r2] - ldrd r10, r11, [r2, #8] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2] + ldr r9, [r2, #4] +#else + ldrd r8, r9, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #8] + ldr r11, [r2, #12] +#else + ldrd r10, r11, [r2, #8] +#endif subs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 sbcs r11, r7, r11 - strd r8, r9, [r0] - strd r10, r11, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #8] + str r11, [r0, #12] +#else + strd r10, r11, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] - ldrd r6, r7, [r1, #24] - ldrd r8, r9, [r2, #16] - ldrd r10, r11, [r2, #24] +#else + ldrd r3, r5, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #24] + ldr r7, [r1, #28] +#else + ldrd r6, r7, [r1, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r2, #16] + ldr r9, [r2, #20] +#else + ldrd r8, r9, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r2, #24] + ldr r11, [r2, #28] +#else + ldrd r10, r11, [r2, #24] +#endif sbcs r8, r3, r8 sbcs r9, r5, r9 sbcs r10, r6, r10 @@ -5442,9 +9361,18 @@ fe_ge_sub: and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r3, r3, r12 adcs r5, r5, r4 adcs r6, r6, r4 @@ -5453,11 +9381,30 @@ fe_ge_sub: adcs r9, r9, r4 adcs r10, r10, r4 adc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r2, [sp, #160] ldr r1, [sp] ldr r0, [sp, #8] @@ -5477,11 +9424,30 @@ fe_ge_sub: add r0, sp, #16 ldr r1, [sp] # Double +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] - ldrd r6, r7, [r1, #8] - ldrd r8, r9, [r1, #16] - ldrd r10, r11, [r1, #24] +#else + ldrd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #8] + ldr r7, [r1, #12] +#else + ldrd r6, r7, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #16] + ldr r9, [r1, #20] +#else + ldrd r8, r9, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r10, [r1, #24] + ldr r11, [r1, #28] +#else + ldrd r10, r11, [r1, #24] +#endif adds r3, r3, r3 adcs r5, r5, r5 adcs r6, r6, r6 @@ -5504,68 +9470,153 @@ fe_ge_sub: sbcs r9, r9, r4 sbcs r10, r10, r4 sbc r11, r11, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] - strd r6, r7, [r0, #8] - strd r8, r9, [r0, #16] - strd r10, r11, [r0, #24] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r0, #24] + str r11, [r0, #28] +#else + strd r10, r11, [r0, #24] +#endif ldr r0, [sp, #4] ldr r1, [sp] ldr r2, [sp, #8] # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -5579,111 +9630,235 @@ fe_ge_sub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif ldr r0, [sp, #12] ldr r1, [sp, #8] add r2, sp, #16 # Add-Sub # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2] ldr r5, [r2, #4] - ldrd r6, r7, [r0] +#else + ldrd r3, r5, [r2] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0] + str r9, [r0, #4] +#else + strd r8, r9, [r0] +#endif # Sub subs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1] + str r11, [r1, #4] +#else + strd r10, r11, [r1] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #8] ldr r5, [r2, #12] - ldrd r6, r7, [r0, #8] +#else + ldrd r3, r5, [r2, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #8] + str r9, [r0, #12] +#else + strd r8, r9, [r0, #8] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #8] + str r11, [r1, #12] +#else + strd r10, r11, [r1, #8] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #16] ldr r5, [r2, #20] - ldrd r6, r7, [r0, #16] +#else + ldrd r3, r5, [r2, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, #-1 adcs r8, r3, r6 mov r12, #0 adcs r9, r5, r7 adc r12, r12, #0 - strd r8, r9, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #16] + str r9, [r0, #20] +#else + strd r8, r9, [r0, #16] +#endif # Sub adds lr, lr, #-1 sbcs r10, r3, r6 mov lr, #0 sbcs r11, r5, r7 adc lr, lr, #0 - strd r10, r11, [r1, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #16] + str r11, [r1, #20] +#else + strd r10, r11, [r1, #16] +#endif # Add +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r2, #24] ldr r5, [r2, #28] - ldrd r6, r7, [r0, #24] +#else + ldrd r3, r5, [r2, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, #-1 adcs r8, r3, r6 adc r9, r5, r7 @@ -5697,54 +9872,112 @@ fe_ge_sub: and r12, r4, r12 and lr, r4, #0x7fffffff # Sub modulus (if overflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0] ldr r5, [r0, #4] +#else + ldrd r3, r5, [r0] +#endif subs r3, r3, r12 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0] str r5, [r0, #4] +#else + strd r3, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #8] ldr r5, [r0, #12] +#else + ldrd r3, r5, [r0, #8] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #8] str r5, [r0, #12] +#else + strd r3, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r0, #16] ldr r5, [r0, #20] +#else + ldrd r3, r5, [r0, #16] +#endif sbcs r3, r3, r4 sbcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r0, #16] str r5, [r0, #20] +#else + strd r3, r5, [r0, #16] +#endif sbcs r8, r8, r4 sbc r9, r9, lr - strd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r0, #24] + str r9, [r0, #28] +#else + strd r8, r9, [r0, #24] +#endif mov r12, #-19 asr r4, r11, #31 # Mask the modulus and r12, r4, r12 and lr, r4, #0x7fffffff # Add modulus (if underflow) +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1] ldr r5, [r1, #4] +#else + ldrd r3, r5, [r1] +#endif adds r3, r3, r12 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1] str r5, [r1, #4] +#else + strd r3, r5, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #8] ldr r5, [r1, #12] +#else + ldrd r3, r5, [r1, #8] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #8] str r5, [r1, #12] +#else + strd r3, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r3, [r1, #16] ldr r5, [r1, #20] +#else + ldrd r3, r5, [r1, #16] +#endif adcs r3, r3, r4 adcs r5, r5, r4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r3, [r1, #16] str r5, [r1, #20] +#else + strd r3, r5, [r1, #16] +#endif adcs r10, r10, r4 adc r11, r11, lr - strd r10, r11, [r1, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r10, [r1, #24] + str r11, [r1, #28] +#else + strd r10, r11, [r1, #24] +#endif add sp, sp, #0x60 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size fe_ge_sub,.-fe_ge_sub diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 485643554..4600aece9 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -54,15 +54,55 @@ void fe_frombytes(fe out_p, const unsigned char* in_p) register const unsigned char* in asm ("r1") = in_p; __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[in]]\n\t" + "ldr r3, [%[in], #4]\n\t" +#else "ldrd r2, r3, [%[in]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[in], #8]\n\t" + "ldr lr, [%[in], #12]\n\t" +#else "ldrd r12, lr, [%[in], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[in], #16]\n\t" + "ldr r5, [%[in], #20]\n\t" +#else "ldrd r4, r5, [%[in], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[in], #24]\n\t" + "ldr r7, [%[in], #28]\n\t" +#else "ldrd r6, r7, [%[in], #24]\n\t" +#endif "and r7, r7, #0x7fffffff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[out]]\n\t" + "str r3, [%[out], #4]\n\t" +#else "strd r2, r3, [%[out]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[out], #8]\n\t" + "str lr, [%[out], #12]\n\t" +#else "strd r12, lr, [%[out], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[out], #16]\n\t" + "str r5, [%[out], #20]\n\t" +#else "strd r4, r5, [%[out], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[out], #24]\n\t" + "str r7, [%[out], #28]\n\t" +#else "strd r6, r7, [%[out], #24]\n\t" +#endif : [out] "+r" (out), [in] "+r" (in) : : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7" @@ -75,10 +115,30 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) register const fe n asm ("r1") = n_p; __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[n]]\n\t" + "ldr r3, [%[n], #4]\n\t" +#else "ldrd r2, r3, [%[n]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[n], #8]\n\t" + "ldr lr, [%[n], #12]\n\t" +#else "ldrd r12, lr, [%[n], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[n], #16]\n\t" + "ldr r5, [%[n], #20]\n\t" +#else "ldrd r4, r5, [%[n], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[n], #24]\n\t" + "ldr r7, [%[n], #28]\n\t" +#else "ldrd r6, r7, [%[n], #24]\n\t" +#endif "adds r8, r2, #19\n\t" "adcs r8, r3, #0\n\t" "adcs r8, r12, #0\n\t" @@ -98,10 +158,30 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) "adcs r6, r6, #0\n\t" "adc r7, r7, #0\n\t" "and r7, r7, #0x7fffffff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[out]]\n\t" + "str r3, [%[out], #4]\n\t" +#else "strd r2, r3, [%[out]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[out], #8]\n\t" + "str lr, [%[out], #12]\n\t" +#else "strd r12, lr, [%[out], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[out], #16]\n\t" + "str r5, [%[out], #20]\n\t" +#else "strd r4, r5, [%[out], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[out], #24]\n\t" + "str r7, [%[out], #28]\n\t" +#else "strd r6, r7, [%[out], #24]\n\t" +#endif : [out] "+r" (out), [n] "+r" (n) : : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" @@ -116,10 +196,30 @@ void fe_1(fe n_p) /* Set one */ "mov r2, #1\n\t" "mov r1, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[n]]\n\t" + "str r1, [%[n], #4]\n\t" +#else "strd r2, r1, [%[n]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #8]\n\t" + "str r1, [%[n], #12]\n\t" +#else "strd r1, r1, [%[n], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #16]\n\t" + "str r1, [%[n], #20]\n\t" +#else "strd r1, r1, [%[n], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #24]\n\t" + "str r1, [%[n], #28]\n\t" +#else "strd r1, r1, [%[n], #24]\n\t" +#endif : [n] "+r" (n) : : "memory", "r1", "r2" @@ -133,10 +233,30 @@ void fe_0(fe n_p) __asm__ __volatile__ ( /* Set zero */ "mov r1, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n]]\n\t" + "str r1, [%[n], #4]\n\t" +#else "strd r1, r1, [%[n]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #8]\n\t" + "str r1, [%[n], #12]\n\t" +#else "strd r1, r1, [%[n], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #16]\n\t" + "str r1, [%[n], #20]\n\t" +#else "strd r1, r1, [%[n], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r1, [%[n], #24]\n\t" + "str r1, [%[n], #28]\n\t" +#else "strd r1, r1, [%[n], #24]\n\t" +#endif : [n] "+r" (n) : : "memory", "r1" @@ -150,14 +270,54 @@ void fe_copy(fe r_p, const fe a_p) __asm__ __volatile__ ( /* Copy */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" +#else "ldrd r12, lr, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" +#else "strd r2, r3, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r], #8]\n\t" + "str lr, [%[r], #12]\n\t" +#else "strd r12, lr, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" +#else "ldrd r2, r3, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" +#else "ldrd r12, lr, [%[a], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" +#else "strd r2, r3, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r], #24]\n\t" + "str lr, [%[r], #28]\n\t" +#else "strd r12, lr, [%[r], #24]\n\t" +#endif : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r12", "lr" @@ -172,20 +332,70 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) __asm__ __volatile__ ( /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" +#else "ldrd r12, lr, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[b]]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r6, r7, [%[b]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" +#else "ldrd r8, r9, [%[b], #8]\n\t" +#endif "subs r6, r12, r6\n\t" "sbcs r7, lr, r7\n\t" "sbcs r8, r4, r8\n\t" "sbcs r9, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r]]\n\t" + "str r7, [%[r], #4]\n\t" +#else "strd r6, r7, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "strd r8, r9, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[a], #20]\n\t" +#else "ldrd r12, lr, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r6, r7, [%[b], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" +#else "ldrd r8, r9, [%[b], #24]\n\t" +#endif "sbcs r6, r12, r6\n\t" "sbcs r7, lr, r7\n\t" "sbcs r8, r4, r8\n\t" @@ -196,8 +406,18 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) "and r10, r3, r10\n\t" "and r11, r3, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[r]]\n\t" + "ldr lr, [%[r], #4]\n\t" +#else "ldrd r12, lr, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #8]\n\t" +#endif "adds r12, r12, r10\n\t" "adcs lr, lr, r3\n\t" "adcs r4, r4, r3\n\t" @@ -206,10 +426,30 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) "adcs r7, r7, r3\n\t" "adcs r8, r8, r3\n\t" "adc r9, r9, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r]]\n\t" + "str lr, [%[r], #4]\n\t" +#else "strd r12, lr, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" +#else "strd r6, r7, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #24]\n\t" +#endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -224,20 +464,70 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) __asm__ __volatile__ ( /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" +#else "ldrd r12, lr, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[b]]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r6, r7, [%[b]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" +#else "ldrd r8, r9, [%[b], #8]\n\t" +#endif "adds r6, r12, r6\n\t" "adcs r7, lr, r7\n\t" "adcs r8, r4, r8\n\t" "adcs r9, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r]]\n\t" + "str r7, [%[r], #4]\n\t" +#else "strd r6, r7, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "strd r8, r9, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[a], #20]\n\t" +#else "ldrd r12, lr, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r6, r7, [%[b], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" +#else "ldrd r8, r9, [%[b], #24]\n\t" +#endif "adcs r6, r12, r6\n\t" "adcs r7, lr, r7\n\t" "adcs r8, r4, r8\n\t" @@ -248,8 +538,18 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) "and r10, r3, r10\n\t" "and r11, r3, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[r]]\n\t" + "ldr lr, [%[r], #4]\n\t" +#else "ldrd r12, lr, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #8]\n\t" +#endif "subs r12, r12, r10\n\t" "sbcs lr, lr, r3\n\t" "sbcs r4, r4, r3\n\t" @@ -258,10 +558,30 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) "sbcs r7, r7, r3\n\t" "sbcs r8, r8, r3\n\t" "sbc r9, r9, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r]]\n\t" + "str lr, [%[r], #4]\n\t" +#else "strd r12, lr, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" +#else "strd r6, r7, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #24]\n\t" +#endif : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -276,23 +596,69 @@ void fe_neg(fe r_p, const fe a_p) __asm__ __volatile__ ( "mov r5, #-1\n\t" "mov r4, #-19\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" +#else "ldrd r12, lr, [%[a], #8]\n\t" +#endif "subs r2, r4, r2\n\t" "sbcs r3, r5, r3\n\t" "sbcs r12, r5, r12\n\t" "sbcs lr, r5, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" +#else "strd r2, r3, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r], #8]\n\t" + "str lr, [%[r], #12]\n\t" +#else "strd r12, lr, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r4, #0x7fffff\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #0xff\n\t" +#else "mov r4, #0x7fffffff\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" +#else "ldrd r2, r3, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" +#else "ldrd r12, lr, [%[a], #24]\n\t" +#endif "sbcs r2, r5, r2\n\t" "sbcs r3, r5, r3\n\t" "sbcs r12, r5, r12\n\t" "sbc lr, r4, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" +#else "strd r2, r3, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[r], #24]\n\t" + "str lr, [%[r], #28]\n\t" +#else "strd r12, lr, [%[r], #24]\n\t" +#endif : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r12", "lr", "r4", "r5" @@ -304,10 +670,30 @@ int fe_isnonzero(const fe a_p) register const fe a asm ("r0") = a_p; __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" +#else "ldrd r12, lr, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#else "ldrd r6, r7, [%[a], #24]\n\t" +#endif "adds r1, r2, #19\n\t" "adcs r1, r3, #0\n\t" "adcs r1, r12, #0\n\t" @@ -346,14 +732,34 @@ int fe_isnegative(const fe a_p) register const fe a asm ("r0") = a_p; __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[a], #12]\n\t" +#else "ldrd r12, lr, [%[a], #8]\n\t" +#endif "adds r1, r2, #19\n\t" "adcs r1, r3, #0\n\t" "adcs r1, r12, #0\n\t" "adcs r1, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" +#else "ldrd r2, r3, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[a], #28]\n\t" +#else "ldrd r12, lr, [%[a], #24]\n\t" +#endif "adcs r1, r2, #0\n\t" "adcs r1, r3, #0\n\t" "adcs r1, r12, #0\n\t" @@ -386,25 +792,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r4, #0\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #31\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -412,25 +839,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #30\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -438,25 +886,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #29\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -464,25 +933,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #28\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -490,25 +980,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #27\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -516,25 +1027,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #26\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -542,25 +1074,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #25\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -568,25 +1121,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #24\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base]]\n\t" + "ldr r9, [%[base], #4]\n\t" +#else "ldrd r8, r9, [%[base]]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #32]\n\t" + "ldr r9, [%[base], #36]\n\t" +#else "ldrd r8, r9, [%[base], #32]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #64]\n\t" + "ldr r9, [%[base], #68]\n\t" +#else "ldrd r8, r9, [%[base], #64]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -614,9 +1188,24 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r9, r9, r6\n\t" "and r9, r9, r10\n\t" "eor r6, r6, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r3, [%[r]]\n\t" + "str r12, [%[r], #4]\n\t" +#else "strd r3, r12, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str lr, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" +#else "strd lr, r4, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r5, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" +#else "strd r5, r6, [%[r], #64]\n\t" +#endif "sbfx r7, %[b], #7, #1\n\t" "eor r10, %[b], r7\n\t" "sub r10, r10, r7\n\t" @@ -626,25 +1215,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r4, #0\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #31\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -652,25 +1262,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #30\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -678,25 +1309,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #29\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -704,25 +1356,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #28\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -730,25 +1403,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #27\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -756,25 +1450,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #26\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -782,25 +1497,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #25\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -808,25 +1544,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #24\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #8]\n\t" + "ldr r9, [%[base], #12]\n\t" +#else "ldrd r8, r9, [%[base], #8]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #40]\n\t" + "ldr r9, [%[base], #44]\n\t" +#else "ldrd r8, r9, [%[base], #40]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #72]\n\t" + "ldr r9, [%[base], #76]\n\t" +#else "ldrd r8, r9, [%[base], #72]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -855,9 +1612,24 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r9, r9, r6\n\t" "and r9, r9, r10\n\t" "eor r6, r6, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r3, [%[r], #8]\n\t" + "str r12, [%[r], #12]\n\t" +#else "strd r3, r12, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str lr, [%[r], #40]\n\t" + "str r4, [%[r], #44]\n\t" +#else "strd lr, r4, [%[r], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r5, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" +#else "strd r5, r6, [%[r], #72]\n\t" +#endif "sbfx r7, %[b], #7, #1\n\t" "eor r10, %[b], r7\n\t" "sub r10, r10, r7\n\t" @@ -867,25 +1639,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r4, #0\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #31\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -893,25 +1686,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #30\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -919,25 +1733,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #29\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -945,25 +1780,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #28\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -971,25 +1827,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #27\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -997,25 +1874,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #26\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1023,25 +1921,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #25\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1049,25 +1968,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #24\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #16]\n\t" + "ldr r9, [%[base], #20]\n\t" +#else "ldrd r8, r9, [%[base], #16]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #48]\n\t" + "ldr r9, [%[base], #52]\n\t" +#else "ldrd r8, r9, [%[base], #48]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #80]\n\t" + "ldr r9, [%[base], #84]\n\t" +#else "ldrd r8, r9, [%[base], #80]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1096,9 +2036,24 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r9, r9, r6\n\t" "and r9, r9, r10\n\t" "eor r6, r6, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r3, [%[r], #16]\n\t" + "str r12, [%[r], #20]\n\t" +#else "strd r3, r12, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str lr, [%[r], #48]\n\t" + "str r4, [%[r], #52]\n\t" +#else "strd lr, r4, [%[r], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r5, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" +#else "strd r5, r6, [%[r], #80]\n\t" +#endif "sbfx r7, %[b], #7, #1\n\t" "eor r10, %[b], r7\n\t" "sub r10, r10, r7\n\t" @@ -1108,25 +2063,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r4, #0\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #31\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1134,25 +2110,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #30\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1160,25 +2157,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #29\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1186,25 +2204,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #28\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1212,25 +2251,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #27\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1238,25 +2298,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #26\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1264,25 +2345,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #25\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1290,25 +2392,46 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r5, r5, r8\n\t" "eor r6, r6, r9\n\t" "add %[base], %[base], #0x60\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r7, #0x800000\n\t" + "lsl r7, r7, #8\n\t" + "add r7, r7, #0x0\n\t" +#else "mov r7, #0x80000000\n\t" +#endif "ror r7, r7, #24\n\t" "ror r7, r7, r10\n\t" "asr r7, r7, #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #24]\n\t" + "ldr r9, [%[base], #28]\n\t" +#else "ldrd r8, r9, [%[base], #24]\n\t" +#endif "eor r8, r8, r3\n\t" "eor r9, r9, r12\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor r3, r3, r8\n\t" "eor r12, r12, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #56]\n\t" + "ldr r9, [%[base], #60]\n\t" +#else "ldrd r8, r9, [%[base], #56]\n\t" +#endif "eor r8, r8, lr\n\t" "eor r9, r9, r4\n\t" "and r8, r8, r7\n\t" "and r9, r9, r7\n\t" "eor lr, lr, r8\n\t" "eor r4, r4, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[base], #88]\n\t" + "ldr r9, [%[base], #92]\n\t" +#else "ldrd r8, r9, [%[base], #88]\n\t" +#endif "eor r8, r8, r5\n\t" "eor r9, r9, r6\n\t" "and r8, r8, r7\n\t" @@ -1317,7 +2440,13 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r6, r6, r9\n\t" "sub %[base], %[base], #0x2a0\n\t" "mov r8, #-1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r9, #0x7fffff\n\t" + "lsl r9, r9, #8\n\t" + "add r9, r9, #0xff\n\t" +#else "mov r9, #0x7fffffff\n\t" +#endif "rsbs r11, r11, #0\n\t" "sbcs r8, r8, r5\n\t" "sbc r9, r9, r6\n\t" @@ -1336,9 +2465,24 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r9, r9, r6\n\t" "and r9, r9, r10\n\t" "eor r6, r6, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r3, [%[r], #24]\n\t" + "str r12, [%[r], #28]\n\t" +#else "strd r3, r12, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str lr, [%[r], #56]\n\t" + "str r4, [%[r], #60]\n\t" +#else "strd lr, r4, [%[r], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r5, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" +#else "strd r5, r6, [%[r], #88]\n\t" +#endif : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -1768,10 +2912,30 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) "str r4, [sp, #60]\n\t" /* Reduce */ /* Load bottom half */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else "ldrd r8, r9, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #24]\n\t" + "ldr r11, [sp, #28]\n\t" +#else "ldrd r10, r11, [sp, #24]\n\t" +#endif "lsr r3, r11, #31\n\t" "and r11, r11, #0x7fffffff\n\t" "mov lr, #19\n\t" @@ -1868,10 +3032,30 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) "adcs r10, r10, #0\n\t" "adc r11, r11, #0\n\t" /* Store */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" +#else "strd r6, r7, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else "strd r10, r11, [%[r], #24]\n\t" +#endif "add sp, sp, #0x40\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : @@ -2193,10 +3377,30 @@ void fe_sq(fe r_p, const fe a_p) "str r4, [sp, #60]\n\t" /* Reduce */ /* Load bottom half */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else "ldrd r8, r9, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #24]\n\t" + "ldr r11, [sp, #28]\n\t" +#else "ldrd r10, r11, [sp, #24]\n\t" +#endif "lsr r2, r11, #31\n\t" "and r11, r11, #0x7fffffff\n\t" "mov r12, #19\n\t" @@ -2293,10 +3497,30 @@ void fe_sq(fe r_p, const fe a_p) "adcs r10, r10, #0\n\t" "adc r11, r11, #0\n\t" /* Store */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" +#else "strd r6, r7, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else "strd r10, r11, [%[r], #24]\n\t" +#endif "add sp, sp, #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -2311,10 +3535,30 @@ void fe_mul121666(fe r_p, fe a_p) __asm__ __volatile__ ( /* Multiply by 121666 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" +#else "ldrd r6, r7, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[a], #28]\n\t" +#else "ldrd r8, r9, [%[a], #24]\n\t" +#endif "movw lr, #0xdb42\n\t" "movt lr, #1\n\t" "umull r2, r10, r2, lr\n\t" @@ -2352,10 +3596,30 @@ void fe_mul121666(fe r_p, fe a_p) "adcs r7, r7, #0\n\t" "adcs r8, r8, #0\n\t" "adc r9, r9, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" +#else "strd r2, r3, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" +#else "strd r6, r7, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #24]\n\t" +#endif : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10" @@ -2676,10 +3940,30 @@ void fe_sq2(fe r_p, const fe a_p) "str r4, [sp, #60]\n\t" /* Double and Reduce */ /* Load bottom half */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #16]\n\t" + "ldr r9, [sp, #20]\n\t" +#else "ldrd r8, r9, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #24]\n\t" + "ldr r11, [sp, #28]\n\t" +#else "ldrd r10, r11, [sp, #24]\n\t" +#endif "lsr r2, r11, #30\n\t" "lsl r11, r11, #1\n\t" "orr r11, r11, r10, lsr #31\n\t" @@ -2791,10 +4075,30 @@ void fe_sq2(fe r_p, const fe a_p) "adcs r10, r10, #0\n\t" "adc r11, r11, #0\n\t" /* Store */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" +#else "strd r6, r7, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else "strd r10, r11, [%[r], #24]\n\t" +#endif "add sp, sp, #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) : @@ -2980,32 +4284,132 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) /* Set one */ "mov r11, #1\n\t" "mov r10, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r11, [%[r]]\n\t" + "str r10, [%[r], #4]\n\t" +#else "strd r11, r10, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #8]\n\t" + "str r10, [%[r], #12]\n\t" +#else "strd r10, r10, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #16]\n\t" + "str r10, [%[r], #20]\n\t" +#else "strd r10, r10, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r10, [%[r], #28]\n\t" +#else "strd r10, r10, [%[r], #24]\n\t" +#endif /* Set zero */ "mov r10, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp]\n\t" + "str r10, [sp, #4]\n\t" +#else "strd r10, r10, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #8]\n\t" + "str r10, [sp, #12]\n\t" +#else "strd r10, r10, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #16]\n\t" + "str r10, [sp, #20]\n\t" +#else "strd r10, r10, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #24]\n\t" + "str r10, [sp, #28]\n\t" +#else "strd r10, r10, [sp, #24]\n\t" +#endif /* Set one */ "mov r11, #1\n\t" "mov r10, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r11, [sp, #32]\n\t" + "str r10, [sp, #36]\n\t" +#else "strd r11, r10, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #40]\n\t" + "str r10, [sp, #44]\n\t" +#else "strd r10, r10, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #48]\n\t" + "str r10, [sp, #52]\n\t" +#else "strd r10, r10, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #56]\n\t" + "str r10, [sp, #60]\n\t" +#else "strd r10, r10, [sp, #56]\n\t" +#endif /* Copy */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" +#else "ldrd r4, r5, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" +#else "ldrd r6, r7, [%[a], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else "strd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #72]\n\t" + "str r7, [sp, #76]\n\t" +#else "strd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#else "ldrd r6, r7, [%[a], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #80]\n\t" + "str r5, [sp, #84]\n\t" +#else "strd r4, r5, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #88]\n\t" + "str r7, [sp, #92]\n\t" +#else "strd r6, r7, [sp, #88]\n\t" +#endif "mov %[n], #30\n\t" "str %[n], [sp, #180]\n\t" "mov %[a], #28\n\t" @@ -3026,8 +4430,18 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "ldr %[r], [sp, #160]\n\t" /* Conditional Swap */ "neg %[n], %[n]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#else "ldrd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #68]\n\t" +#else "ldrd r6, r7, [sp, #64]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3036,10 +4450,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #64]\n\t" + "str r7, [sp, #68]\n\t" +#else "strd r6, r7, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #76]\n\t" +#else "ldrd r6, r7, [sp, #72]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3048,10 +4482,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #72]\n\t" + "str r7, [sp, #76]\n\t" +#else "strd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #16]\n\t" + "ldr r5, [%[r], #20]\n\t" +#else "ldrd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #84]\n\t" +#else "ldrd r6, r7, [sp, #80]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3060,10 +4514,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #80]\n\t" + "str r7, [sp, #84]\n\t" +#else "strd r6, r7, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #24]\n\t" + "ldr r5, [%[r], #28]\n\t" +#else "ldrd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #88]\n\t" + "ldr r7, [sp, #92]\n\t" +#else "ldrd r6, r7, [sp, #88]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3072,13 +4546,33 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #88]\n\t" + "str r7, [sp, #92]\n\t" +#else "strd r6, r7, [sp, #88]\n\t" +#endif "ldr %[n], [sp, #172]\n\t" /* Conditional Swap */ "neg %[n], %[n]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" +#else "ldrd r6, r7, [sp, #32]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3087,10 +4581,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #32]\n\t" + "str r7, [sp, #36]\n\t" +#else "strd r6, r7, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else "ldrd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #44]\n\t" +#else "ldrd r6, r7, [sp, #40]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3099,10 +4613,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #40]\n\t" + "str r7, [sp, #44]\n\t" +#else "strd r6, r7, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else "ldrd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #52]\n\t" +#else "ldrd r6, r7, [sp, #48]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3111,10 +4645,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else "strd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #48]\n\t" + "str r7, [sp, #52]\n\t" +#else "strd r6, r7, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #28]\n\t" +#else "ldrd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" +#else "ldrd r6, r7, [sp, #56]\n\t" +#endif "eor r8, r4, r6\n\t" "eor r9, r5, r7\n\t" "and r8, r8, %[n]\n\t" @@ -3123,60 +4677,140 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "eor r5, r5, r9\n\t" "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #24]\n\t" + "str r5, [sp, #28]\n\t" +#else "strd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #56]\n\t" + "str r7, [sp, #60]\n\t" +#else "strd r6, r7, [sp, #56]\n\t" +#endif "ldr %[n], [sp, #184]\n\t" "str %[n], [sp, #172]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#else "ldrd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #4]\n\t" +#else "ldrd r6, r7, [sp]\n\t" +#endif "adds r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r]]\n\t" + "str r9, [%[r], #4]\n\t" +#else "strd r8, r9, [%[r]]\n\t" +#endif /* Sub */ "subs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #128]\n\t" + "str r11, [sp, #132]\n\t" +#else "strd r10, r11, [sp, #128]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "strd r8, r9, [%[r], #8]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #136]\n\t" + "str r11, [sp, #140]\n\t" +#else "strd r10, r11, [sp, #136]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #16]\n\t" + "ldr r5, [%[r], #20]\n\t" +#else "ldrd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" +#else "ldrd r6, r7, [sp, #16]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #144]\n\t" + "str r11, [sp, #148]\n\t" +#else "strd r10, r11, [sp, #144]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #24]\n\t" + "ldr r5, [%[r], #28]\n\t" +#else "ldrd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #28]\n\t" +#else "ldrd r6, r7, [sp, #24]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "adc r9, r5, r7\n\t" @@ -3190,92 +4824,232 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#else "ldrd r4, r5, [%[r]]\n\t" +#endif "subs r4, r4, r3\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #8]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #16]\n\t" + "ldr r5, [%[r], #20]\n\t" +#else "ldrd r4, r5, [%[r], #16]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif "sbcs r8, r8, %[a]\n\t" "sbc r9, r9, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #24]\n\t" +#endif "mov r3, #-19\n\t" "asr %[a], r11, #31\n\t" /* Mask the modulus */ "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #128]\n\t" + "ldr r5, [sp, #132]\n\t" +#else "ldrd r4, r5, [sp, #128]\n\t" +#endif "adds r4, r4, r3\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #128]\n\t" + "str r5, [sp, #132]\n\t" +#else "strd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #136]\n\t" + "ldr r5, [sp, #140]\n\t" +#else "ldrd r4, r5, [sp, #136]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #136]\n\t" + "str r5, [sp, #140]\n\t" +#else "strd r4, r5, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #144]\n\t" + "ldr r5, [sp, #148]\n\t" +#else "ldrd r4, r5, [sp, #144]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #144]\n\t" + "str r5, [sp, #148]\n\t" +#else "strd r4, r5, [sp, #144]\n\t" +#endif "adcs r10, r10, %[a]\n\t" "adc r11, r11, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #152]\n\t" + "str r11, [sp, #156]\n\t" +#else "strd r10, r11, [sp, #152]\n\t" +#endif /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else "ldrd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" +#else "ldrd r6, r7, [sp, #32]\n\t" +#endif "adds r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp]\n\t" + "str r9, [sp, #4]\n\t" +#else "strd r8, r9, [sp]\n\t" +#endif /* Sub */ "subs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #96]\n\t" + "str r11, [sp, #100]\n\t" +#else "strd r10, r11, [sp, #96]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else "ldrd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #44]\n\t" +#else "ldrd r6, r7, [sp, #40]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #8]\n\t" + "str r9, [sp, #12]\n\t" +#else "strd r8, r9, [sp, #8]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #104]\n\t" + "str r11, [sp, #108]\n\t" +#else "strd r10, r11, [sp, #104]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else "ldrd r4, r5, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #52]\n\t" +#else "ldrd r6, r7, [sp, #48]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #16]\n\t" + "str r9, [sp, #20]\n\t" +#else "strd r8, r9, [sp, #16]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #112]\n\t" + "str r11, [sp, #116]\n\t" +#else "strd r10, r11, [sp, #112]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #88]\n\t" + "ldr r5, [sp, #92]\n\t" +#else "ldrd r4, r5, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" +#else "ldrd r6, r7, [sp, #56]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "adc r9, r5, r7\n\t" @@ -3289,42 +5063,112 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif "subs r4, r4, r3\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else "ldrd r4, r5, [sp, #8]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else "ldrd r4, r5, [sp, #16]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else "strd r4, r5, [sp, #16]\n\t" +#endif "sbcs r8, r8, %[a]\n\t" "sbc r9, r9, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #24]\n\t" + "str r9, [sp, #28]\n\t" +#else "strd r8, r9, [sp, #24]\n\t" +#endif "mov r3, #-19\n\t" "asr %[a], r11, #31\n\t" /* Mask the modulus */ "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else "ldrd r4, r5, [sp, #96]\n\t" +#endif "adds r4, r4, r3\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #96]\n\t" + "str r5, [sp, #100]\n\t" +#else "strd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #104]\n\t" + "ldr r5, [sp, #108]\n\t" +#else "ldrd r4, r5, [sp, #104]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #104]\n\t" + "str r5, [sp, #108]\n\t" +#else "strd r4, r5, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else "ldrd r4, r5, [sp, #112]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #112]\n\t" + "str r5, [sp, #116]\n\t" +#else "strd r4, r5, [sp, #112]\n\t" +#endif "adcs r10, r10, %[a]\n\t" "adc r11, r11, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #120]\n\t" + "str r11, [sp, #124]\n\t" +#else "strd r10, r11, [sp, #120]\n\t" +#endif "ldr r2, [sp, #160]\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #32\n\t" @@ -3341,54 +5185,124 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "bl fe_sq\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #36]\n\t" +#else "ldrd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #4]\n\t" +#else "ldrd r6, r7, [sp]\n\t" +#endif "adds r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #64]\n\t" + "str r9, [sp, #68]\n\t" +#else "strd r8, r9, [sp, #64]\n\t" +#endif /* Sub */ "subs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp]\n\t" + "str r11, [sp, #4]\n\t" +#else "strd r10, r11, [sp]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #44]\n\t" +#else "ldrd r4, r5, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #72]\n\t" + "str r9, [sp, #76]\n\t" +#else "strd r8, r9, [sp, #72]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #8]\n\t" + "str r11, [sp, #12]\n\t" +#else "strd r10, r11, [sp, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #52]\n\t" +#else "ldrd r4, r5, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" +#else "ldrd r6, r7, [sp, #16]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "mov r3, #0\n\t" "adcs r9, r5, r7\n\t" "adc r3, r3, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #80]\n\t" + "str r9, [sp, #84]\n\t" +#else "strd r8, r9, [sp, #80]\n\t" +#endif /* Sub */ "adds r12, r12, #-1\n\t" "sbcs r10, r4, r6\n\t" "mov r12, #0\n\t" "sbcs r11, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #16]\n\t" + "str r11, [sp, #20]\n\t" +#else "strd r10, r11, [sp, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" +#else "ldrd r4, r5, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #28]\n\t" +#else "ldrd r6, r7, [sp, #24]\n\t" +#endif "adds r3, r3, #-1\n\t" "adcs r8, r4, r6\n\t" "adc r9, r5, r7\n\t" @@ -3402,61 +5316,181 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #68]\n\t" +#else "ldrd r4, r5, [sp, #64]\n\t" +#endif "subs r4, r4, r3\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #64]\n\t" + "str r5, [sp, #68]\n\t" +#else "strd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #76]\n\t" +#else "ldrd r4, r5, [sp, #72]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #72]\n\t" + "str r5, [sp, #76]\n\t" +#else "strd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #80]\n\t" + "ldr r5, [sp, #84]\n\t" +#else "ldrd r4, r5, [sp, #80]\n\t" +#endif "sbcs r4, r4, %[a]\n\t" "sbcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #80]\n\t" + "str r5, [sp, #84]\n\t" +#else "strd r4, r5, [sp, #80]\n\t" +#endif "sbcs r8, r8, %[a]\n\t" "sbc r9, r9, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #88]\n\t" + "str r9, [sp, #92]\n\t" +#else "strd r8, r9, [sp, #88]\n\t" +#endif "mov r3, #-19\n\t" "asr %[a], r11, #31\n\t" /* Mask the modulus */ "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #4]\n\t" +#else "ldrd r4, r5, [sp]\n\t" +#endif "adds r4, r4, r3\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #4]\n\t" +#else "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #12]\n\t" +#else "ldrd r4, r5, [sp, #8]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #20]\n\t" +#else "ldrd r4, r5, [sp, #16]\n\t" +#endif "adcs r4, r4, %[a]\n\t" "adcs r5, r5, %[a]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #20]\n\t" +#else "strd r4, r5, [sp, #16]\n\t" +#endif "adcs r10, r10, %[a]\n\t" "adc r11, r11, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #24]\n\t" + "str r11, [sp, #28]\n\t" +#else "strd r10, r11, [sp, #24]\n\t" +#endif "add r2, sp, #0x60\n\t" "add r1, sp, #0x80\n\t" "ldr r0, [sp, #160]\n\t" "bl fe_mul\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #128]\n\t" + "ldr r5, [sp, #132]\n\t" +#else "ldrd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #136]\n\t" + "ldr r7, [sp, #140]\n\t" +#else "ldrd r6, r7, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #96]\n\t" + "ldr r9, [sp, #100]\n\t" +#else "ldrd r8, r9, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #104]\n\t" + "ldr r11, [sp, #108]\n\t" +#else "ldrd r10, r11, [sp, #104]\n\t" +#endif "subs r8, r4, r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #128]\n\t" + "str r9, [sp, #132]\n\t" +#else "strd r8, r9, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #136]\n\t" + "str r11, [sp, #140]\n\t" +#else "strd r10, r11, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #144]\n\t" + "ldr r5, [sp, #148]\n\t" +#else "ldrd r4, r5, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #152]\n\t" + "ldr r7, [sp, #156]\n\t" +#else "ldrd r6, r7, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #116]\n\t" +#else "ldrd r8, r9, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #120]\n\t" + "ldr r11, [sp, #124]\n\t" +#else "ldrd r10, r11, [sp, #120]\n\t" +#endif "sbcs r8, r4, r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -3467,8 +5501,18 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #128]\n\t" + "ldr r5, [sp, #132]\n\t" +#else "ldrd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #136]\n\t" + "ldr r7, [sp, #140]\n\t" +#else "ldrd r6, r7, [sp, #136]\n\t" +#endif "adds r4, r4, r3\n\t" "adcs r5, r5, %[a]\n\t" "adcs r6, r6, %[a]\n\t" @@ -3477,18 +5521,58 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "adcs r9, r9, %[a]\n\t" "adcs r10, r10, %[a]\n\t" "adc r11, r11, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #128]\n\t" + "str r5, [sp, #132]\n\t" +#else "strd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #136]\n\t" + "str r7, [sp, #140]\n\t" +#else "strd r6, r7, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #144]\n\t" + "str r9, [sp, #148]\n\t" +#else "strd r8, r9, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #152]\n\t" + "str r11, [sp, #156]\n\t" +#else "strd r10, r11, [sp, #152]\n\t" +#endif "add r1, sp, #0\n\t" "add r0, sp, #0\n\t" "bl fe_sq\n\t" /* Multiply by 121666 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #128]\n\t" + "ldr r5, [sp, #132]\n\t" +#else "ldrd r4, r5, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #136]\n\t" + "ldr r7, [sp, #140]\n\t" +#else "ldrd r6, r7, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #144]\n\t" + "ldr r9, [sp, #148]\n\t" +#else "ldrd r8, r9, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #152]\n\t" + "ldr r11, [sp, #156]\n\t" +#else "ldrd r10, r11, [sp, #152]\n\t" +#endif "movw r12, #0xdb42\n\t" "movt r12, #1\n\t" "umull r4, %[a], r4, r12\n\t" @@ -3526,28 +5610,98 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "adcs r9, r9, #0\n\t" "adcs r10, r10, #0\n\t" "adc r11, r11, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #36]\n\t" +#else "strd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #40]\n\t" + "str r7, [sp, #44]\n\t" +#else "strd r6, r7, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #48]\n\t" + "str r9, [sp, #52]\n\t" +#else "strd r8, r9, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #56]\n\t" + "str r11, [sp, #60]\n\t" +#else "strd r10, r11, [sp, #56]\n\t" +#endif "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "bl fe_sq\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else "ldrd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #108]\n\t" +#else "ldrd r6, r7, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #32]\n\t" + "ldr r9, [sp, #36]\n\t" +#else "ldrd r8, r9, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #40]\n\t" + "ldr r11, [sp, #44]\n\t" +#else "ldrd r10, r11, [sp, #40]\n\t" +#endif "adds r8, r4, r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #96]\n\t" + "str r9, [sp, #100]\n\t" +#else "strd r8, r9, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #104]\n\t" + "str r11, [sp, #108]\n\t" +#else "strd r10, r11, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #112]\n\t" + "ldr r5, [sp, #116]\n\t" +#else "ldrd r4, r5, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #120]\n\t" + "ldr r7, [sp, #124]\n\t" +#else "ldrd r6, r7, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #48]\n\t" + "ldr r9, [sp, #52]\n\t" +#else "ldrd r8, r9, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [sp, #56]\n\t" + "ldr r11, [sp, #60]\n\t" +#else "ldrd r10, r11, [sp, #56]\n\t" +#endif "adcs r8, r4, r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -3558,8 +5712,18 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "and r3, %[a], r3\n\t" "and r12, %[a], #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [sp, #96]\n\t" + "ldr r5, [sp, #100]\n\t" +#else "ldrd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #108]\n\t" +#else "ldrd r6, r7, [sp, #104]\n\t" +#endif "subs r4, r4, r3\n\t" "sbcs r5, r5, %[a]\n\t" "sbcs r6, r6, %[a]\n\t" @@ -3568,10 +5732,30 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "sbcs r9, r9, %[a]\n\t" "sbcs r10, r10, %[a]\n\t" "sbc r11, r11, r12\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #96]\n\t" + "str r5, [sp, #100]\n\t" +#else "strd r4, r5, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #104]\n\t" + "str r7, [sp, #108]\n\t" +#else "strd r6, r7, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #112]\n\t" + "str r9, [sp, #116]\n\t" +#else "strd r8, r9, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [sp, #120]\n\t" + "str r11, [sp, #124]\n\t" +#else "strd r10, r11, [sp, #120]\n\t" +#endif "add r2, sp, #0\n\t" "ldr r1, [sp, #168]\n\t" "add r0, sp, #32\n\t" @@ -4017,20 +6201,70 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "ldr r1, [sp, #52]\n\t" "ldr r2, [sp, #56]\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "adds r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "adcs r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -4041,8 +6275,18 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" "sbcs r6, r6, r4\n\t" @@ -4051,10 +6295,30 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r1, [sp, #4]\n\t" "ldr r0, [sp, #12]\n\t" "bl fe_sq\n\t" @@ -4063,54 +6327,124 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "ldr r2, [sp]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r2]\n\t" + "ldr r7, [r2, #4]\n\t" +#else "ldrd r6, r7, [r2]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r2, #8]\n\t" + "ldr r7, [r2, #12]\n\t" +#else "ldrd r6, r7, [r2, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r2, #16]\n\t" + "ldr r7, [r2, #20]\n\t" +#else "ldrd r6, r7, [r2, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #24]\n\t" + "ldr r5, [r1, #28]\n\t" +#else "ldrd %[rt], r5, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r2, #24]\n\t" + "ldr r7, [r2, #28]\n\t" +#else "ldrd r6, r7, [r2, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -4124,60 +6458,180 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "ldr r0, [sp]\n\t" "ldr r1, [sp, #12]\n\t" "ldr r2, [sp, #4]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -4188,8 +6642,18 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -4198,30 +6662,100 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r1, [sp, #60]\n\t" "ldr r0, [sp, #12]\n\t" "bl fe_sq2\n\t" "ldr r0, [sp, #12]\n\t" "ldr r1, [sp, #8]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1]\n\t" + "ldr r9, [r1, #4]\n\t" +#else "ldrd r8, r9, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #8]\n\t" + "ldr r11, [r1, #12]\n\t" +#else "ldrd r10, r11, [r1, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1, #16]\n\t" + "ldr r9, [r1, #20]\n\t" +#else "ldrd r8, r9, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #24]\n\t" + "ldr r11, [r1, #28]\n\t" +#else "ldrd r10, r11, [r1, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -4232,8 +6766,18 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -4242,10 +6786,30 @@ void fe_ge_dbl(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "add sp, sp, #16\n\t" : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt) : @@ -4280,20 +6844,70 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "ldr r1, [sp, #72]\n\t" "ldr r2, [sp, #68]\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "adds r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "adcs r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -4304,8 +6918,18 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" "sbcs r6, r6, r4\n\t" @@ -4314,28 +6938,98 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp, #72]\n\t" "ldr r2, [sp, #68]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -4346,8 +7040,18 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -4356,10 +7060,30 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r2, [sp, #88]\n\t" "ldr r1, [sp]\n\t" "ldr r0, [sp, #8]\n\t" @@ -4377,54 +7101,124 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "ldr r2, [sp, #8]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -4438,49 +7232,139 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "ldr r0, [sp, #8]\n\t" "ldr r1, [sp, #76]\n\t" /* Double */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1, #16]\n\t" + "ldr r9, [r1, #20]\n\t" +#else "ldrd r8, r9, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #24]\n\t" + "ldr r11, [r1, #28]\n\t" +#else "ldrd r10, r11, [r1, #24]\n\t" +#endif "adds %[rt], %[rt], %[rt]\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -4503,62 +7387,152 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #8]\n\t" "ldr r1, [sp, #12]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1]\n\t" + "ldr r7, [r1, #4]\n\t" +#else "ldrd r6, r7, [r1]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #16]\n\t" + "ldr r7, [r1, #20]\n\t" +#else "ldrd r6, r7, [r1, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #24]\n\t" + "ldr r5, [r0, #28]\n\t" +#else "ldrd %[rt], r5, [r0, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -4572,42 +7546,112 @@ void fe_ge_madd(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "add sp, sp, #32\n\t" : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt) : @@ -4646,20 +7690,70 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "ldr r1, [sp, #72]\n\t" "ldr r2, [sp, #68]\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "adds r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "adcs r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -4670,8 +7764,18 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" "sbcs r6, r6, r4\n\t" @@ -4680,28 +7784,98 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp, #72]\n\t" "ldr r2, [sp, #68]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -4712,8 +7886,18 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -4722,10 +7906,30 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r2, [sp, #92]\n\t" "ldr r1, [sp]\n\t" "ldr r0, [sp, #8]\n\t" @@ -4743,54 +7947,124 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "ldr r2, [sp, #8]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -4804,49 +8078,139 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "ldr r0, [sp, #8]\n\t" "ldr r1, [sp, #76]\n\t" /* Double */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1, #16]\n\t" + "ldr r9, [r1, #20]\n\t" +#else "ldrd r8, r9, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #24]\n\t" + "ldr r11, [r1, #28]\n\t" +#else "ldrd r10, r11, [r1, #24]\n\t" +#endif "adds %[rt], %[rt], %[rt]\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -4869,62 +8233,152 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #12]\n\t" "ldr r1, [sp, #8]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #24]\n\t" + "ldr r5, [r1, #28]\n\t" +#else "ldrd %[rt], r5, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -4938,42 +8392,112 @@ void fe_ge_msub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "add sp, sp, #32\n\t" : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt) : @@ -5013,20 +8537,70 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "ldr r1, [sp, #136]\n\t" "ldr r2, [sp, #132]\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "adds r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "adcs r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -5037,8 +8611,18 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" "sbcs r6, r6, r4\n\t" @@ -5047,28 +8631,98 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp, #136]\n\t" "ldr r2, [sp, #132]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -5079,8 +8733,18 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -5089,10 +8753,30 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r2, [sp, #156]\n\t" "ldr r1, [sp]\n\t" "ldr r0, [sp, #8]\n\t" @@ -5112,10 +8796,30 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "add r0, sp, #16\n\t" "ldr r1, [sp]\n\t" /* Double */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1, #16]\n\t" + "ldr r9, [r1, #20]\n\t" +#else "ldrd r8, r9, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #24]\n\t" + "ldr r11, [r1, #28]\n\t" +#else "ldrd r10, r11, [r1, #24]\n\t" +#endif "adds %[rt], %[rt], %[rt]\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -5138,63 +8842,153 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp]\n\t" "ldr r2, [sp, #8]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -5208,95 +9002,235 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "ldr r0, [sp, #8]\n\t" "ldr r1, [sp, #12]\n\t" "add r2, sp, #16\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1]\n\t" + "ldr r7, [r1, #4]\n\t" +#else "ldrd r6, r7, [r1]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #16]\n\t" + "ldr r7, [r1, #20]\n\t" +#else "ldrd r6, r7, [r1, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -5310,42 +9244,112 @@ void fe_ge_add(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "add sp, sp, #0x60\n\t" : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt) : @@ -5386,20 +9390,70 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "ldr r1, [sp, #136]\n\t" "ldr r2, [sp, #132]\n\t" /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "adds r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" "adcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "adcs r8, %[rt], r8\n\t" "adcs r9, r5, r9\n\t" "adcs r10, r6, r10\n\t" @@ -5410,8 +9464,18 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" "sbcs r6, r6, r4\n\t" @@ -5420,28 +9484,98 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp, #136]\n\t" "ldr r2, [sp, #132]\n\t" /* Sub */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2]\n\t" + "ldr r9, [r2, #4]\n\t" +#else "ldrd r8, r9, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #8]\n\t" + "ldr r11, [r2, #12]\n\t" +#else "ldrd r10, r11, [r2, #8]\n\t" +#endif "subs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" "sbcs r11, r7, r11\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #8]\n\t" + "str r11, [r0, #12]\n\t" +#else "strd r10, r11, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #24]\n\t" + "ldr r7, [r1, #28]\n\t" +#else "ldrd r6, r7, [r1, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r2, #16]\n\t" + "ldr r9, [r2, #20]\n\t" +#else "ldrd r8, r9, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r2, #24]\n\t" + "ldr r11, [r2, #28]\n\t" +#else "ldrd r10, r11, [r2, #24]\n\t" +#endif "sbcs r8, %[rt], r8\n\t" "sbcs r9, r5, r9\n\t" "sbcs r10, r6, r10\n\t" @@ -5452,8 +9586,18 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" "adcs r6, r6, r4\n\t" @@ -5462,10 +9606,30 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "adcs r9, r9, r4\n\t" "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r2, [sp, #160]\n\t" "ldr r1, [sp]\n\t" "ldr r0, [sp, #8]\n\t" @@ -5485,10 +9649,30 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "add r0, sp, #16\n\t" "ldr r1, [sp]\n\t" /* Double */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r1, #8]\n\t" + "ldr r7, [r1, #12]\n\t" +#else "ldrd r6, r7, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [r1, #16]\n\t" + "ldr r9, [r1, #20]\n\t" +#else "ldrd r8, r9, [r1, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r10, [r1, #24]\n\t" + "ldr r11, [r1, #28]\n\t" +#else "ldrd r10, r11, [r1, #24]\n\t" +#endif "adds %[rt], %[rt], %[rt]\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -5511,63 +9695,153 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "sbcs r9, r9, r4\n\t" "sbcs r10, r10, r4\n\t" "sbc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r0, #8]\n\t" + "str r7, [r0, #12]\n\t" +#else "strd r6, r7, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r0, #24]\n\t" + "str r11, [r0, #28]\n\t" +#else "strd r10, r11, [r0, #24]\n\t" +#endif "ldr r0, [sp, #4]\n\t" "ldr r1, [sp]\n\t" "ldr r2, [sp, #8]\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -5581,95 +9855,235 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "ldr r0, [sp, #12]\n\t" "ldr r1, [sp, #8]\n\t" "add r2, sp, #16\n\t" /* Add-Sub */ /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2]\n\t" + "ldr r5, [r2, #4]\n\t" +#else "ldrd %[rt], r5, [r2]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0]\n\t" + "ldr r7, [r0, #4]\n\t" +#else "ldrd r6, r7, [r0]\n\t" +#endif "adds r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0]\n\t" + "str r9, [r0, #4]\n\t" +#else "strd r8, r9, [r0]\n\t" +#endif /* Sub */ "subs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1]\n\t" + "str r11, [r1, #4]\n\t" +#else "strd r10, r11, [r1]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #8]\n\t" + "ldr r5, [r2, #12]\n\t" +#else "ldrd %[rt], r5, [r2, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #8]\n\t" + "ldr r7, [r0, #12]\n\t" +#else "ldrd r6, r7, [r0, #8]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" +#else "strd r8, r9, [r0, #8]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #8]\n\t" + "str r11, [r1, #12]\n\t" +#else "strd r10, r11, [r1, #8]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #16]\n\t" + "ldr r5, [r2, #20]\n\t" +#else "ldrd %[rt], r5, [r2, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #16]\n\t" + "ldr r7, [r0, #20]\n\t" +#else "ldrd r6, r7, [r0, #16]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "mov r12, #0\n\t" "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #16]\n\t" + "str r9, [r0, #20]\n\t" +#else "strd r8, r9, [r0, #16]\n\t" +#endif /* Sub */ "adds lr, lr, #-1\n\t" "sbcs r10, %[rt], r6\n\t" "mov lr, #0\n\t" "sbcs r11, r5, r7\n\t" "adc lr, lr, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #16]\n\t" + "str r11, [r1, #20]\n\t" +#else "strd r10, r11, [r1, #16]\n\t" +#endif /* Add */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r2, #24]\n\t" + "ldr r5, [r2, #28]\n\t" +#else "ldrd %[rt], r5, [r2, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [r0, #24]\n\t" + "ldr r7, [r0, #28]\n\t" +#else "ldrd r6, r7, [r0, #24]\n\t" +#endif "adds r12, r12, #-1\n\t" "adcs r8, %[rt], r6\n\t" "adc r9, r5, r7\n\t" @@ -5683,42 +10097,112 @@ void fe_ge_sub(fe rx_p, fe ry_p, fe rz_p, fe rt_p, const fe px_p, const fe py_p, "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Sub modulus (if overflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0]\n\t" + "ldr r5, [r0, #4]\n\t" +#else "ldrd %[rt], r5, [r0]\n\t" +#endif "subs %[rt], %[rt], r12\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0]\n\t" + "str r5, [r0, #4]\n\t" +#else "strd %[rt], r5, [r0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #8]\n\t" + "ldr r5, [r0, #12]\n\t" +#else "ldrd %[rt], r5, [r0, #8]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #8]\n\t" + "str r5, [r0, #12]\n\t" +#else "strd %[rt], r5, [r0, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r0, #16]\n\t" + "ldr r5, [r0, #20]\n\t" +#else "ldrd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs %[rt], %[rt], r4\n\t" "sbcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r0, #16]\n\t" + "str r5, [r0, #20]\n\t" +#else "strd %[rt], r5, [r0, #16]\n\t" +#endif "sbcs r8, r8, r4\n\t" "sbc r9, r9, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r0, #24]\n\t" + "str r9, [r0, #28]\n\t" +#else "strd r8, r9, [r0, #24]\n\t" +#endif "mov r12, #-19\n\t" "asr r4, r11, #31\n\t" /* Mask the modulus */ "and r12, r4, r12\n\t" "and lr, r4, #0x7fffffff\n\t" /* Add modulus (if underflow) */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1]\n\t" + "ldr r5, [r1, #4]\n\t" +#else "ldrd %[rt], r5, [r1]\n\t" +#endif "adds %[rt], %[rt], r12\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1]\n\t" + "str r5, [r1, #4]\n\t" +#else "strd %[rt], r5, [r1]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #8]\n\t" + "ldr r5, [r1, #12]\n\t" +#else "ldrd %[rt], r5, [r1, #8]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #8]\n\t" + "str r5, [r1, #12]\n\t" +#else "strd %[rt], r5, [r1, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[rt], [r1, #16]\n\t" + "ldr r5, [r1, #20]\n\t" +#else "ldrd %[rt], r5, [r1, #16]\n\t" +#endif "adcs %[rt], %[rt], r4\n\t" "adcs r5, r5, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[rt], [r1, #16]\n\t" + "str r5, [r1, #20]\n\t" +#else "strd %[rt], r5, [r1, #16]\n\t" +#endif "adcs r10, r10, r4\n\t" "adc r11, r11, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [r1, #24]\n\t" + "str r11, [r1, #28]\n\t" +#else "strd r10, r11, [r1, #24]\n\t" +#endif "add sp, sp, #0x60\n\t" : [rx] "+r" (rx), [ry] "+r" (ry), [rz] "+r" (rz), [rt] "+r" (rt) : diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S index 6814bdd4d..92b9cce83 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -108,24 +108,81 @@ Transform_Sha256_Len: sub sp, sp, #0xc0 adr r3, L_SHA256_transform_len_k # Copy digest to add in at end +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - ldrd r8, r9, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #64] str lr, [sp, #68] - strd r4, r5, [sp, #72] - strd r6, r7, [sp, #80] - strd r8, r9, [sp, #88] +#else + strd r12, lr, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #72] + str r5, [sp, #76] +#else + strd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #80] + str r7, [sp, #84] +#else + strd r6, r7, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #88] + str r9, [sp, #92] +#else + strd r8, r9, [sp, #88] +#endif # Start of loop processing a block L_SHA256_transform_len_begin: # Load, Reverse and Store W - 64 bytes +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1] ldr lr, [r1, #4] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r1, #16] - ldrd r8, r9, [r1, #24] +#else + ldrd r12, lr, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #8] + ldr r5, [r1, #12] +#else + ldrd r4, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #16] + ldr r7, [r1, #20] +#else + ldrd r6, r7, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -134,16 +191,54 @@ L_SHA256_transform_len_begin: rev r7, r7 rev r8, r8 rev r9, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp] str lr, [sp, #4] - strd r4, r5, [sp, #8] - strd r6, r7, [sp, #16] - strd r8, r9, [sp, #24] +#else + strd r12, lr, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #12] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #16] + str r7, [sp, #20] +#else + strd r6, r7, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #24] + str r9, [sp, #28] +#else + strd r8, r9, [sp, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #32] ldr lr, [r1, #36] - ldrd r4, r5, [r1, #40] - ldrd r6, r7, [r1, #48] - ldrd r8, r9, [r1, #56] +#else + ldrd r12, lr, [r1, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #40] + ldr r5, [r1, #44] +#else + ldrd r4, r5, [r1, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #48] + ldr r7, [r1, #52] +#else + ldrd r6, r7, [r1, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -152,11 +247,30 @@ L_SHA256_transform_len_begin: rev r7, r7 rev r8, r8 rev r9, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #32] str lr, [sp, #36] - strd r4, r5, [sp, #40] - strd r6, r7, [sp, #48] - strd r8, r9, [sp, #56] +#else + strd r12, lr, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #40] + str r5, [sp, #44] +#else + strd r4, r5, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #48] + str r7, [sp, #52] +#else + strd r6, r7, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #56] + str r9, [sp, #60] +#else + strd r8, r9, [sp, #56] +#endif ldr r9, [r0, #4] ldr r12, [r0, #8] eor r9, r9, r12 @@ -1431,36 +1545,110 @@ L_SHA256_transform_len_start: str r6, [r0, #16] str r7, [r0] # Add in digest from start +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [sp, #64] - ldrd r8, r9, [sp, #72] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #68] +#else + ldrd r6, r7, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #72] + ldr r9, [sp, #76] +#else + ldrd r8, r9, [sp, #72] +#endif add r12, r12, r6 add lr, lr, r7 add r4, r4, r8 add r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] - strd r4, r5, [r0, #8] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #64] str lr, [sp, #68] - strd r4, r5, [sp, #72] +#else + strd r12, lr, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #72] + str r5, [sp, #76] +#else + strd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [sp, #80] - ldrd r8, r9, [sp, #88] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #84] +#else + ldrd r6, r7, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #92] +#else + ldrd r8, r9, [sp, #88] +#endif add r12, r12, r6 add lr, lr, r7 add r4, r4, r8 add r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] - strd r4, r5, [r0, #24] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #80] str lr, [sp, #84] - strd r4, r5, [sp, #88] +#else + strd r12, lr, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #88] + str r5, [sp, #92] +#else + strd r4, r5, [sp, #88] +#endif subs r2, r2, #0x40 sub r3, r3, #0xc0 add r1, r1, #0x40 @@ -1548,14 +1736,39 @@ Transform_Sha256_Len: push {r4, r5, r6, r7, r8, r9, r10, lr} vpush {d8-d11} sub sp, sp, #24 - strd r0, r1, [sp] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r0, [sp] + str r1, [sp, #4] +#else + strd r0, r1, [sp] +#endif str r2, [sp, #8] adr r12, L_SHA256_transform_neon_len_k # Load digest into registers - ldrd r2, r3, [r0] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - ldrd r8, r9, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #4] +#else + ldrd r2, r3, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif # Start of loop processing a block L_SHA256_transform_neon_len_begin: # Load W @@ -2481,22 +2694,62 @@ L_SHA256_transform_neon_len_start: add r2, r2, r1 ldr r10, [sp] # Add in digest from start - ldrd r0, r1, [r10] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r0, [r10] + ldr r1, [r10, #4] +#else + ldrd r0, r1, [r10] +#endif add r2, r2, r0 add r3, r3, r1 - strd r2, r3, [r10] - ldrd r0, r1, [r10, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r2, [r10] + str r3, [r10, #4] +#else + strd r2, r3, [r10] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r0, [r10, #8] + ldr r1, [r10, #12] +#else + ldrd r0, r1, [r10, #8] +#endif add r4, r4, r0 add r5, r5, r1 - strd r4, r5, [r10, #8] - ldrd r0, r1, [r10, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r10, #8] + str r5, [r10, #12] +#else + strd r4, r5, [r10, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r0, [r10, #16] + ldr r1, [r10, #20] +#else + ldrd r0, r1, [r10, #16] +#endif add r6, r6, r0 add r7, r7, r1 - strd r6, r7, [r10, #16] - ldrd r0, r1, [r10, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r10, #16] + str r7, [r10, #20] +#else + strd r6, r7, [r10, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r0, [r10, #24] + ldr r1, [r10, #28] +#else + ldrd r0, r1, [r10, #24] +#endif add r8, r8, r0 add r9, r9, r1 - strd r8, r9, [r10, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [r10, #24] + str r9, [r10, #28] +#else + strd r8, r9, [r10, #24] +#endif ldr r10, [sp, #8] ldr r1, [sp, #4] subs r10, r10, #0x40 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index e211af660..5d6981e15 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -115,22 +115,82 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "sub sp, sp, #0xc0\n\t" "mov r3, %[L_SHA256_transform_len_k]\n\t" /* Copy digest to add in at end */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha256]]\n\t" + "ldr lr, [%[sha256], #4]\n\t" +#else "ldrd r12, lr, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha256], #8]\n\t" + "ldr r5, [%[sha256], #12]\n\t" +#else "ldrd r4, r5, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" +#else "ldrd r6, r7, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" +#else "ldrd r8, r9, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #64]\n\t" + "str lr, [sp, #68]\n\t" +#else "strd r12, lr, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #72]\n\t" + "str r5, [sp, #76]\n\t" +#else "strd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #80]\n\t" + "str r7, [sp, #84]\n\t" +#else "strd r6, r7, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #88]\n\t" + "str r9, [sp, #92]\n\t" +#else "strd r8, r9, [sp, #88]\n\t" +#endif /* Start of loop processing a block */ "\n" "L_SHA256_transform_len_begin_%=: \n\t" /* Load, Reverse and Store W - 64 bytes */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data]]\n\t" + "ldr lr, [%[data], #4]\n\t" +#else "ldrd r12, lr, [%[data]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #8]\n\t" + "ldr r5, [%[data], #12]\n\t" +#else "ldrd r4, r5, [%[data], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #16]\n\t" + "ldr r7, [%[data], #20]\n\t" +#else "ldrd r6, r7, [%[data], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #24]\n\t" + "ldr r9, [%[data], #28]\n\t" +#else "ldrd r8, r9, [%[data], #24]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -139,14 +199,54 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "rev r7, r7\n\t" "rev r8, r8\n\t" "rev r9, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp]\n\t" + "str lr, [sp, #4]\n\t" +#else "strd r12, lr, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #12]\n\t" +#else "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #16]\n\t" + "str r7, [sp, #20]\n\t" +#else "strd r6, r7, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #24]\n\t" + "str r9, [sp, #28]\n\t" +#else "strd r8, r9, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data], #32]\n\t" + "ldr lr, [%[data], #36]\n\t" +#else "ldrd r12, lr, [%[data], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #40]\n\t" + "ldr r5, [%[data], #44]\n\t" +#else "ldrd r4, r5, [%[data], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #48]\n\t" + "ldr r7, [%[data], #52]\n\t" +#else "ldrd r6, r7, [%[data], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #56]\n\t" + "ldr r9, [%[data], #60]\n\t" +#else "ldrd r8, r9, [%[data], #56]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -155,10 +255,30 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "rev r7, r7\n\t" "rev r8, r8\n\t" "rev r9, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #32]\n\t" + "str lr, [sp, #36]\n\t" +#else "strd r12, lr, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #40]\n\t" + "str r5, [sp, #44]\n\t" +#else "strd r4, r5, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #48]\n\t" + "str r7, [sp, #52]\n\t" +#else "strd r6, r7, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #56]\n\t" + "str r9, [sp, #60]\n\t" +#else "strd r8, r9, [sp, #56]\n\t" +#endif "ldr r9, [%[sha256], #4]\n\t" "ldr r12, [%[sha256], #8]\n\t" "eor r9, r9, r12\n\t" @@ -1434,30 +1554,110 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "str r6, [%[sha256], #16]\n\t" "str r7, [%[sha256]]\n\t" /* Add in digest from start */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha256]]\n\t" + "ldr lr, [%[sha256], #4]\n\t" +#else "ldrd r12, lr, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha256], #8]\n\t" + "ldr r5, [%[sha256], #12]\n\t" +#else "ldrd r4, r5, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #68]\n\t" +#else "ldrd r6, r7, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #72]\n\t" + "ldr r9, [sp, #76]\n\t" +#else "ldrd r8, r9, [sp, #72]\n\t" +#endif "add r12, r12, r6\n\t" "add lr, lr, r7\n\t" "add r4, r4, r8\n\t" "add r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha256]]\n\t" + "str lr, [%[sha256], #4]\n\t" +#else "strd r12, lr, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha256], #8]\n\t" + "str r5, [%[sha256], #12]\n\t" +#else "strd r4, r5, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #64]\n\t" + "str lr, [sp, #68]\n\t" +#else "strd r12, lr, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #72]\n\t" + "str r5, [sp, #76]\n\t" +#else "strd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha256], #16]\n\t" + "ldr lr, [%[sha256], #20]\n\t" +#else "ldrd r12, lr, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha256], #24]\n\t" + "ldr r5, [%[sha256], #28]\n\t" +#else "ldrd r4, r5, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #84]\n\t" +#else "ldrd r6, r7, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #92]\n\t" +#else "ldrd r8, r9, [sp, #88]\n\t" +#endif "add r12, r12, r6\n\t" "add lr, lr, r7\n\t" "add r4, r4, r8\n\t" "add r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha256], #16]\n\t" + "str lr, [%[sha256], #20]\n\t" +#else "strd r12, lr, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha256], #24]\n\t" + "str r5, [%[sha256], #28]\n\t" +#else "strd r4, r5, [%[sha256], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #80]\n\t" + "str lr, [sp, #84]\n\t" +#else "strd r12, lr, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #88]\n\t" + "str r5, [sp, #92]\n\t" +#else "strd r4, r5, [sp, #88]\n\t" +#endif "subs %[len], %[len], #0x40\n\t" "sub r3, r3, #0xc0\n\t" "add %[data], %[data], #0x40\n\t" @@ -1549,14 +1749,39 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) __asm__ __volatile__ ( "sub sp, sp, #24\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[sha256], [sp]\n\t" + "str %[data], [sp, #4]\n\t" +#else "strd %[sha256], %[data], [sp]\n\t" +#endif "str %[len], [sp, #8]\n\t" "mov r12, %[L_SHA256_transform_neon_len_k]\n\t" /* Load digest into registers */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[len], [%[sha256]]\n\t" + "ldr r3, [%[sha256], #4]\n\t" +#else "ldrd %[len], r3, [%[sha256]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha256], #8]\n\t" + "ldr r5, [%[sha256], #12]\n\t" +#else "ldrd r4, r5, [%[sha256], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha256], #16]\n\t" + "ldr r7, [%[sha256], #20]\n\t" +#else "ldrd r6, r7, [%[sha256], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[sha256], #24]\n\t" + "ldr r9, [%[sha256], #28]\n\t" +#else "ldrd r8, r9, [%[sha256], #24]\n\t" +#endif /* Start of loop processing a block */ "\n" "L_SHA256_transform_neon_len_begin_%=: \n\t" @@ -2484,22 +2709,62 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "add %[len], %[len], %[data]\n\t" "ldr r10, [sp]\n\t" /* Add in digest from start */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[sha256], [r10]\n\t" + "ldr %[data], [r10, #4]\n\t" +#else "ldrd %[sha256], %[data], [r10]\n\t" +#endif "add %[len], %[len], %[sha256]\n\t" "add r3, r3, %[data]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str %[len], [r10]\n\t" + "str r3, [r10, #4]\n\t" +#else "strd %[len], r3, [r10]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[sha256], [r10, #8]\n\t" + "ldr %[data], [r10, #12]\n\t" +#else "ldrd %[sha256], %[data], [r10, #8]\n\t" +#endif "add r4, r4, %[sha256]\n\t" "add r5, r5, %[data]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [r10, #8]\n\t" + "str r5, [r10, #12]\n\t" +#else "strd r4, r5, [r10, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[sha256], [r10, #16]\n\t" + "ldr %[data], [r10, #20]\n\t" +#else "ldrd %[sha256], %[data], [r10, #16]\n\t" +#endif "add r6, r6, %[sha256]\n\t" "add r7, r7, %[data]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [r10, #16]\n\t" + "str r7, [r10, #20]\n\t" +#else "strd r6, r7, [r10, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr %[sha256], [r10, #24]\n\t" + "ldr %[data], [r10, #28]\n\t" +#else "ldrd %[sha256], %[data], [r10, #24]\n\t" +#endif "add r8, r8, %[sha256]\n\t" "add r9, r9, %[data]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [r10, #24]\n\t" + "str r9, [r10, #28]\n\t" +#else "strd r8, r9, [r10, #24]\n\t" +#endif "ldr r10, [sp, #8]\n\t" "ldr %[data], [sp, #4]\n\t" "subs r10, r10, #0x40\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index 0dd0c6b36..2a731b92d 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -204,34 +204,129 @@ Transform_Sha512_Len: sub sp, sp, #0xc0 adr r3, L_SHA512_transform_len_k # Copy digest to add in at end +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] - ldrd r8, r9, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#else + ldrd r8, r9, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #128] str lr, [sp, #132] - strd r4, r5, [sp, #136] - strd r6, r7, [sp, #144] - strd r8, r9, [sp, #152] +#else + strd r12, lr, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #136] + str r5, [sp, #140] +#else + strd r4, r5, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #144] + str r7, [sp, #148] +#else + strd r6, r7, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #152] + str r9, [sp, #156] +#else + strd r8, r9, [sp, #152] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] - ldrd r8, r9, [r0, #56] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r0, #56] + ldr r9, [r0, #60] +#else + ldrd r8, r9, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #160] str lr, [sp, #164] - strd r4, r5, [sp, #168] - strd r6, r7, [sp, #176] - strd r8, r9, [sp, #184] +#else + strd r12, lr, [sp, #160] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #168] + str r5, [sp, #172] +#else + strd r4, r5, [sp, #168] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [sp, #176] + str r7, [sp, #180] +#else + strd r6, r7, [sp, #176] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r8, [sp, #184] + str r9, [sp, #188] +#else + strd r8, r9, [sp, #184] +#endif # Start of loop processing a block L_SHA512_transform_len_begin: # Load, Reverse and Store W +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1] ldr lr, [r1, #4] - ldrd r4, r5, [r1, #8] - ldrd r6, r7, [r1, #16] - ldrd r8, r9, [r1, #24] +#else + ldrd r12, lr, [r1] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #8] + ldr r5, [r1, #12] +#else + ldrd r4, r5, [r1, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #16] + ldr r7, [r1, #20] +#else + ldrd r6, r7, [r1, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #24] + ldr r9, [r1, #28] +#else + ldrd r8, r9, [r1, #24] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -248,11 +343,30 @@ L_SHA512_transform_len_begin: str r6, [sp, #20] str r9, [sp, #24] str r8, [sp, #28] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #32] ldr lr, [r1, #36] - ldrd r4, r5, [r1, #40] - ldrd r6, r7, [r1, #48] - ldrd r8, r9, [r1, #56] +#else + ldrd r12, lr, [r1, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #40] + ldr r5, [r1, #44] +#else + ldrd r4, r5, [r1, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #48] + ldr r7, [r1, #52] +#else + ldrd r6, r7, [r1, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #56] + ldr r9, [r1, #60] +#else + ldrd r8, r9, [r1, #56] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -269,11 +383,30 @@ L_SHA512_transform_len_begin: str r6, [sp, #52] str r9, [sp, #56] str r8, [sp, #60] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #64] ldr lr, [r1, #68] - ldrd r4, r5, [r1, #72] - ldrd r6, r7, [r1, #80] - ldrd r8, r9, [r1, #88] +#else + ldrd r12, lr, [r1, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #72] + ldr r5, [r1, #76] +#else + ldrd r4, r5, [r1, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #80] + ldr r7, [r1, #84] +#else + ldrd r6, r7, [r1, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #88] + ldr r9, [r1, #92] +#else + ldrd r8, r9, [r1, #88] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -290,11 +423,30 @@ L_SHA512_transform_len_begin: str r6, [sp, #84] str r9, [sp, #88] str r8, [sp, #92] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r1, #96] ldr lr, [r1, #100] - ldrd r4, r5, [r1, #104] - ldrd r6, r7, [r1, #112] - ldrd r8, r9, [r1, #120] +#else + ldrd r12, lr, [r1, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r1, #104] + ldr r5, [r1, #108] +#else + ldrd r4, r5, [r1, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r1, #112] + ldr r7, [r1, #116] +#else + ldrd r6, r7, [r1, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r1, #120] + ldr r9, [r1, #124] +#else + ldrd r8, r9, [r1, #120] +#endif rev r12, r12 rev lr, lr rev r4, r4 @@ -312,17 +464,30 @@ L_SHA512_transform_len_begin: str r9, [sp, #120] str r8, [sp, #124] # Pre-calc: b ^ c - ldrd r8, r9, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [r0, #8] + ldr r9, [r0, #12] +#else + ldrd r8, r9, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r8, r8, r12 eor r9, r9, lr mov r10, #4 # Start of 16 rounds L_SHA512_transform_len_start: # Round 0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -337,42 +502,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r6, r7, [sp] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp] + ldr r7, [sp, #4] +#else + ldrd r6, r7, [sp] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3] + ldr r5, [r3, #4] +#else + ldrd r4, r5, [r3] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - strd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -387,31 +606,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif mov r8, r6 mov r9, r7 # Calc new W[0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #112] ldr lr, [sp, #116] +#else + ldrd r12, lr, [sp, #112] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -427,17 +678,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp] ldr lr, [sp, #4] - ldrd r6, r7, [sp, #72] +#else + ldrd r12, lr, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #76] +#else + ldrd r6, r7, [sp, #72] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp] str lr, [sp, #4] +#else + strd r12, lr, [sp] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #8] ldr lr, [sp, #12] +#else + ldrd r12, lr, [sp, #8] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -453,15 +721,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp] ldr lr, [sp, #4] +#else + ldrd r12, lr, [sp] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp] str lr, [sp, #4] +#else + strd r12, lr, [sp] +#endif # Round 1 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -476,42 +756,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r6, r7, [sp, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #8] + ldr r5, [r3, #12] +#else + ldrd r4, r5, [r3, #8] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - strd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -526,31 +860,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif mov r8, r6 mov r9, r7 # Calc new W[1] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #120] ldr lr, [sp, #124] +#else + ldrd r12, lr, [sp, #120] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -566,17 +932,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #8] ldr lr, [sp, #12] - ldrd r6, r7, [sp, #80] +#else + ldrd r12, lr, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #84] +#else + ldrd r6, r7, [sp, #80] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #8] str lr, [sp, #12] +#else + strd r12, lr, [sp, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #16] ldr lr, [sp, #20] +#else + ldrd r12, lr, [sp, #16] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -592,15 +975,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #8] ldr lr, [sp, #12] +#else + ldrd r12, lr, [sp, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #8] str lr, [sp, #12] +#else + strd r12, lr, [sp, #8] +#endif # Round 2 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -615,42 +1010,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r6, r7, [sp, #16] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #16] + ldr r7, [sp, #20] +#else + ldrd r6, r7, [sp, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #16] + ldr r5, [r3, #20] +#else + ldrd r4, r5, [r3, #16] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - strd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -665,31 +1114,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif mov r8, r6 mov r9, r7 # Calc new W[2] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp] ldr lr, [sp, #4] +#else + ldrd r12, lr, [sp] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -705,17 +1186,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #16] ldr lr, [sp, #20] - ldrd r6, r7, [sp, #88] +#else + ldrd r12, lr, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #88] + ldr r7, [sp, #92] +#else + ldrd r6, r7, [sp, #88] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #16] str lr, [sp, #20] +#else + strd r12, lr, [sp, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #24] ldr lr, [sp, #28] +#else + ldrd r12, lr, [sp, #24] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -731,15 +1229,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #16] ldr lr, [sp, #20] +#else + ldrd r12, lr, [sp, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #16] str lr, [sp, #20] +#else + strd r12, lr, [sp, #16] +#endif # Round 3 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -754,42 +1264,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r6, r7, [sp, #24] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #24] + ldr r7, [sp, #28] +#else + ldrd r6, r7, [sp, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #24] + ldr r5, [r3, #28] +#else + ldrd r4, r5, [r3, #24] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - strd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -804,31 +1368,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif mov r8, r6 mov r9, r7 # Calc new W[3] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #8] ldr lr, [sp, #12] +#else + ldrd r12, lr, [sp, #8] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -844,17 +1440,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #24] ldr lr, [sp, #28] - ldrd r6, r7, [sp, #96] +#else + ldrd r12, lr, [sp, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #96] + ldr r7, [sp, #100] +#else + ldrd r6, r7, [sp, #96] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #24] str lr, [sp, #28] +#else + strd r12, lr, [sp, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #32] ldr lr, [sp, #36] +#else + ldrd r12, lr, [sp, #32] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -870,15 +1483,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #24] ldr lr, [sp, #28] +#else + ldrd r12, lr, [sp, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #24] str lr, [sp, #28] +#else + strd r12, lr, [sp, #24] +#endif # Round 4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -893,42 +1518,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r6, r7, [sp, #32] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #32] + ldr r7, [sp, #36] +#else + ldrd r6, r7, [sp, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #32] + ldr r5, [r3, #36] +#else + ldrd r4, r5, [r3, #32] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - strd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -943,31 +1622,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif mov r8, r6 mov r9, r7 # Calc new W[4] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #16] ldr lr, [sp, #20] +#else + ldrd r12, lr, [sp, #16] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -983,17 +1694,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #32] ldr lr, [sp, #36] - ldrd r6, r7, [sp, #104] +#else + ldrd r12, lr, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #108] +#else + ldrd r6, r7, [sp, #104] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #32] str lr, [sp, #36] +#else + strd r12, lr, [sp, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #40] ldr lr, [sp, #44] +#else + ldrd r12, lr, [sp, #40] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1009,15 +1737,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #32] ldr lr, [sp, #36] +#else + ldrd r12, lr, [sp, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #32] str lr, [sp, #36] +#else + strd r12, lr, [sp, #32] +#endif # Round 5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1032,42 +1772,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r6, r7, [sp, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #44] +#else + ldrd r6, r7, [sp, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #40] + ldr r5, [r3, #44] +#else + ldrd r4, r5, [r3, #40] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - strd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1082,31 +1876,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif mov r8, r6 mov r9, r7 # Calc new W[5] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #24] ldr lr, [sp, #28] +#else + ldrd r12, lr, [sp, #24] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1122,17 +1948,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #40] ldr lr, [sp, #44] - ldrd r6, r7, [sp, #112] +#else + ldrd r12, lr, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #112] + ldr r7, [sp, #116] +#else + ldrd r6, r7, [sp, #112] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #40] str lr, [sp, #44] +#else + strd r12, lr, [sp, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #48] ldr lr, [sp, #52] +#else + ldrd r12, lr, [sp, #48] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1148,15 +1991,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #40] ldr lr, [sp, #44] +#else + ldrd r12, lr, [sp, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #40] str lr, [sp, #44] +#else + strd r12, lr, [sp, #40] +#endif # Round 6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1171,42 +2026,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r6, r7, [sp, #48] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #52] +#else + ldrd r6, r7, [sp, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #48] + ldr r5, [r3, #52] +#else + ldrd r4, r5, [r3, #48] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - strd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1221,31 +2130,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif mov r8, r6 mov r9, r7 # Calc new W[6] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #32] ldr lr, [sp, #36] +#else + ldrd r12, lr, [sp, #32] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1261,17 +2202,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #48] ldr lr, [sp, #52] - ldrd r6, r7, [sp, #120] +#else + ldrd r12, lr, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #120] + ldr r7, [sp, #124] +#else + ldrd r6, r7, [sp, #120] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #48] str lr, [sp, #52] +#else + strd r12, lr, [sp, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #56] ldr lr, [sp, #60] +#else + ldrd r12, lr, [sp, #56] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1287,15 +2245,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #48] ldr lr, [sp, #52] +#else + ldrd r12, lr, [sp, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #48] str lr, [sp, #52] +#else + strd r12, lr, [sp, #48] +#endif # Round 7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1310,42 +2280,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r6, r7, [sp, #56] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #60] +#else + ldrd r6, r7, [sp, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #56] + ldr r5, [r3, #60] +#else + ldrd r4, r5, [r3, #56] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - strd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1360,31 +2384,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif mov r8, r6 mov r9, r7 # Calc new W[7] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #40] ldr lr, [sp, #44] +#else + ldrd r12, lr, [sp, #40] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1400,17 +2456,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #56] ldr lr, [sp, #60] - ldrd r6, r7, [sp] +#else + ldrd r12, lr, [sp, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp] + ldr r7, [sp, #4] +#else + ldrd r6, r7, [sp] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #56] str lr, [sp, #60] +#else + strd r12, lr, [sp, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #64] ldr lr, [sp, #68] +#else + ldrd r12, lr, [sp, #64] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1426,15 +2499,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #56] ldr lr, [sp, #60] +#else + ldrd r12, lr, [sp, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #56] str lr, [sp, #60] +#else + strd r12, lr, [sp, #56] +#endif # Round 8 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1449,42 +2534,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r6, r7, [sp, #64] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #68] +#else + ldrd r6, r7, [sp, #64] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #64] + ldr r5, [r3, #68] +#else + ldrd r4, r5, [r3, #64] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - strd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1499,31 +2638,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif mov r8, r6 mov r9, r7 # Calc new W[8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #48] ldr lr, [sp, #52] +#else + ldrd r12, lr, [sp, #48] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1539,17 +2710,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #64] ldr lr, [sp, #68] - ldrd r6, r7, [sp, #8] +#else + ldrd r12, lr, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #64] str lr, [sp, #68] +#else + strd r12, lr, [sp, #64] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #72] ldr lr, [sp, #76] +#else + ldrd r12, lr, [sp, #72] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1565,15 +2753,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #64] ldr lr, [sp, #68] +#else + ldrd r12, lr, [sp, #64] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #64] str lr, [sp, #68] +#else + strd r12, lr, [sp, #64] +#endif # Round 9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1588,42 +2788,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r6, r7, [sp, #72] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #76] +#else + ldrd r6, r7, [sp, #72] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #72] + ldr r5, [r3, #76] +#else + ldrd r4, r5, [r3, #72] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - strd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1638,31 +2892,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif mov r8, r6 mov r9, r7 # Calc new W[9] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #56] ldr lr, [sp, #60] +#else + ldrd r12, lr, [sp, #56] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1678,17 +2964,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #72] ldr lr, [sp, #76] - ldrd r6, r7, [sp, #16] +#else + ldrd r12, lr, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #16] + ldr r7, [sp, #20] +#else + ldrd r6, r7, [sp, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #72] str lr, [sp, #76] +#else + strd r12, lr, [sp, #72] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #80] ldr lr, [sp, #84] +#else + ldrd r12, lr, [sp, #80] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1704,15 +3007,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #72] ldr lr, [sp, #76] +#else + ldrd r12, lr, [sp, #72] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #72] str lr, [sp, #76] +#else + strd r12, lr, [sp, #72] +#endif # Round 10 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1727,42 +3042,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r6, r7, [sp, #80] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #84] +#else + ldrd r6, r7, [sp, #80] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #80] + ldr r5, [r3, #84] +#else + ldrd r4, r5, [r3, #80] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - strd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1777,31 +3146,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif mov r8, r6 mov r9, r7 # Calc new W[10] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #64] ldr lr, [sp, #68] +#else + ldrd r12, lr, [sp, #64] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1817,17 +3218,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #80] ldr lr, [sp, #84] - ldrd r6, r7, [sp, #24] +#else + ldrd r12, lr, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #24] + ldr r7, [sp, #28] +#else + ldrd r6, r7, [sp, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #80] str lr, [sp, #84] +#else + strd r12, lr, [sp, #80] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #88] ldr lr, [sp, #92] +#else + ldrd r12, lr, [sp, #88] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1843,15 +3261,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #80] ldr lr, [sp, #84] +#else + ldrd r12, lr, [sp, #80] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #80] str lr, [sp, #84] +#else + strd r12, lr, [sp, #80] +#endif # Round 11 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -1866,42 +3296,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r6, r7, [sp, #88] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #88] + ldr r7, [sp, #92] +#else + ldrd r6, r7, [sp, #88] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #88] + ldr r5, [r3, #92] +#else + ldrd r4, r5, [r3, #88] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - strd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -1916,31 +3400,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif mov r8, r6 mov r9, r7 # Calc new W[11] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #72] ldr lr, [sp, #76] +#else + ldrd r12, lr, [sp, #72] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -1956,17 +3472,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #88] ldr lr, [sp, #92] - ldrd r6, r7, [sp, #32] +#else + ldrd r12, lr, [sp, #88] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #32] + ldr r7, [sp, #36] +#else + ldrd r6, r7, [sp, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #88] str lr, [sp, #92] +#else + strd r12, lr, [sp, #88] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #96] ldr lr, [sp, #100] +#else + ldrd r12, lr, [sp, #96] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -1982,15 +3515,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #88] ldr lr, [sp, #92] +#else + ldrd r12, lr, [sp, #88] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #88] str lr, [sp, #92] +#else + strd r12, lr, [sp, #88] +#endif # Round 12 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2005,42 +3550,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r6, r7, [sp, #96] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #96] + ldr r7, [sp, #100] +#else + ldrd r6, r7, [sp, #96] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #96] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #96] + ldr r5, [r3, #100] +#else + ldrd r4, r5, [r3, #96] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - strd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2055,31 +3654,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif mov r8, r6 mov r9, r7 # Calc new W[12] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #80] ldr lr, [sp, #84] +#else + ldrd r12, lr, [sp, #80] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -2095,17 +3726,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #96] ldr lr, [sp, #100] - ldrd r6, r7, [sp, #40] +#else + ldrd r12, lr, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #44] +#else + ldrd r6, r7, [sp, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #96] str lr, [sp, #100] +#else + strd r12, lr, [sp, #96] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #104] ldr lr, [sp, #108] +#else + ldrd r12, lr, [sp, #104] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -2121,15 +3769,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #96] ldr lr, [sp, #100] +#else + ldrd r12, lr, [sp, #96] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #96] str lr, [sp, #100] +#else + strd r12, lr, [sp, #96] +#endif # Round 13 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2144,42 +3804,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r6, r7, [sp, #104] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #108] +#else + ldrd r6, r7, [sp, #104] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #104] + ldr r5, [r3, #108] +#else + ldrd r4, r5, [r3, #104] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - strd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2194,31 +3908,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif mov r8, r6 mov r9, r7 # Calc new W[13] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #88] ldr lr, [sp, #92] +#else + ldrd r12, lr, [sp, #88] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -2234,17 +3980,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #104] ldr lr, [sp, #108] - ldrd r6, r7, [sp, #48] +#else + ldrd r12, lr, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #52] +#else + ldrd r6, r7, [sp, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #104] str lr, [sp, #108] +#else + strd r12, lr, [sp, #104] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #112] ldr lr, [sp, #116] +#else + ldrd r12, lr, [sp, #112] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -2260,15 +4023,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #104] ldr lr, [sp, #108] +#else + ldrd r12, lr, [sp, #104] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #104] str lr, [sp, #108] +#else + strd r12, lr, [sp, #104] +#endif # Round 14 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2283,42 +4058,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r6, r7, [sp, #112] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #112] + ldr r7, [sp, #116] +#else + ldrd r6, r7, [sp, #112] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #112] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #112] + ldr r5, [r3, #116] +#else + ldrd r4, r5, [r3, #112] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - strd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2333,31 +4162,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif mov r8, r6 mov r9, r7 # Calc new W[14] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #96] ldr lr, [sp, #100] +#else + ldrd r12, lr, [sp, #96] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -2373,17 +4234,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #112] ldr lr, [sp, #116] - ldrd r6, r7, [sp, #56] +#else + ldrd r12, lr, [sp, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #60] +#else + ldrd r6, r7, [sp, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #112] str lr, [sp, #116] +#else + strd r12, lr, [sp, #112] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #120] ldr lr, [sp, #124] +#else + ldrd r12, lr, [sp, #120] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -2399,15 +4277,27 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #112] ldr lr, [sp, #116] +#else + ldrd r12, lr, [sp, #112] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #112] str lr, [sp, #116] +#else + strd r12, lr, [sp, #112] +#endif # Round 15 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2422,42 +4312,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r6, r7, [sp, #120] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #120] + ldr r7, [sp, #124] +#else + ldrd r6, r7, [sp, #120] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #120] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #120] + ldr r5, [r3, #124] +#else + ldrd r4, r5, [r3, #120] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - strd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2472,31 +4416,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif mov r8, r6 mov r9, r7 # Calc new W[15] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #104] ldr lr, [sp, #108] +#else + ldrd r12, lr, [sp, #104] +#endif lsrs r4, r12, #19 lsrs r5, lr, #19 orr r5, r5, r12, lsl #13 @@ -2512,17 +4488,34 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #26 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #120] ldr lr, [sp, #124] - ldrd r6, r7, [sp, #64] +#else + ldrd r12, lr, [sp, #120] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #68] +#else + ldrd r6, r7, [sp, #64] +#endif adds r12, r12, r4 adc lr, lr, r5 adds r12, r12, r6 adc lr, lr, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #120] str lr, [sp, #124] +#else + strd r12, lr, [sp, #120] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp] ldr lr, [sp, #4] +#else + ldrd r12, lr, [sp] +#endif lsrs r4, r12, #1 lsrs r5, lr, #1 orr r5, r5, r12, lsl #31 @@ -2538,18 +4531,30 @@ L_SHA512_transform_len_start: orr r6, r6, lr, lsl #25 eor r5, r5, r7 eor r4, r4, r6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [sp, #120] ldr lr, [sp, #124] +#else + ldrd r12, lr, [sp, #120] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #120] str lr, [sp, #124] +#else + strd r12, lr, [sp, #120] +#endif add r3, r3, #0x80 subs r10, r10, #1 bne L_SHA512_transform_len_start # Round 0 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2564,42 +4569,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r6, r7, [sp] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp] + ldr r7, [sp, #4] +#else + ldrd r6, r7, [sp] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3] + ldr r5, [r3, #4] +#else + ldrd r4, r5, [r3] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - strd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2614,31 +4673,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif mov r8, r6 mov r9, r7 # Round 1 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2653,42 +4744,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r6, r7, [sp, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #8] + ldr r7, [sp, #12] +#else + ldrd r6, r7, [sp, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #8] + ldr r5, [r3, #12] +#else + ldrd r4, r5, [r3, #8] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - strd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2703,31 +4848,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif mov r8, r6 mov r9, r7 # Round 2 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2742,42 +4919,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r6, r7, [sp, #16] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #16] + ldr r7, [sp, #20] +#else + ldrd r6, r7, [sp, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #16] + ldr r5, [r3, #20] +#else + ldrd r4, r5, [r3, #16] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - strd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2792,31 +5023,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif mov r8, r6 mov r9, r7 # Round 3 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2831,42 +5094,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r6, r7, [sp, #24] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #24] + ldr r7, [sp, #28] +#else + ldrd r6, r7, [sp, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #24] + ldr r5, [r3, #28] +#else + ldrd r4, r5, [r3, #24] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - strd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2881,31 +5198,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif mov r8, r6 mov r9, r7 # Round 4 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -2920,42 +5269,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r6, r7, [sp, #32] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #32] + ldr r7, [sp, #36] +#else + ldrd r6, r7, [sp, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #32] + ldr r5, [r3, #36] +#else + ldrd r4, r5, [r3, #32] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - strd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -2970,31 +5373,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif mov r8, r6 mov r9, r7 # Round 5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3009,42 +5444,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r6, r7, [sp, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #44] +#else + ldrd r6, r7, [sp, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #40] + ldr r5, [r3, #44] +#else + ldrd r4, r5, [r3, #40] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - strd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3059,31 +5548,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif mov r8, r6 mov r9, r7 # Round 6 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3098,42 +5619,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r6, r7, [sp, #48] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #52] +#else + ldrd r6, r7, [sp, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #48] + ldr r5, [r3, #52] +#else + ldrd r4, r5, [r3, #48] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - strd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3148,31 +5723,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif mov r8, r6 mov r9, r7 # Round 7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3187,42 +5794,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r6, r7, [sp, #56] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #60] +#else + ldrd r6, r7, [sp, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #56] + ldr r5, [r3, #60] +#else + ldrd r4, r5, [r3, #56] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - strd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3237,31 +5898,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif mov r8, r6 mov r9, r7 # Round 8 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3276,42 +5969,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r6, r7, [sp, #64] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #68] +#else + ldrd r6, r7, [sp, #64] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #64] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #64] + ldr r5, [r3, #68] +#else + ldrd r4, r5, [r3, #64] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - strd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #24] + str r7, [r0, #28] +#else + strd r6, r7, [r0, #24] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3326,31 +6073,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0] - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #56] str lr, [r0, #60] +#else + strd r12, lr, [r0, #56] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif mov r8, r6 mov r9, r7 # Round 9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3365,42 +6144,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r4, r5, [r0, #32] - ldrd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r6, r7, [sp, #72] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #76] +#else + ldrd r6, r7, [sp, #72] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #72] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #72] + ldr r5, [r3, #76] +#else + ldrd r4, r5, [r3, #72] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - strd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #16] + str r7, [r0, #20] +#else + strd r6, r7, [r0, #16] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3415,31 +6248,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #56] - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] +#else + strd r12, lr, [r0, #48] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #48] + str r5, [r0, #52] +#else + strd r4, r5, [r0, #48] +#endif mov r8, r6 mov r9, r7 # Round 10 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3454,42 +6319,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r6, r7, [sp, #80] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #84] +#else + ldrd r6, r7, [sp, #80] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #80] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #80] + ldr r5, [r3, #84] +#else + ldrd r4, r5, [r3, #80] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - strd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #8] + str r7, [r0, #12] +#else + strd r6, r7, [r0, #8] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3504,31 +6423,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #48] - ldrd r4, r5, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #40] str lr, [r0, #44] +#else + strd r12, lr, [r0, #40] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif mov r8, r6 mov r9, r7 # Round 11 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3543,42 +6494,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r4, r5, [r0, #16] - ldrd r6, r7, [r0, #24] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r6, r7, [sp, #88] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #88] + ldr r7, [sp, #92] +#else + ldrd r6, r7, [sp, #88] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #88] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #88] + ldr r5, [r3, #92] +#else + ldrd r4, r5, [r3, #88] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - strd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0] + str r7, [r0, #4] +#else + strd r6, r7, [r0] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3593,31 +6598,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] +#else + ldrd r12, lr, [r0, #32] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #40] - ldrd r4, r5, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] +#else + strd r12, lr, [r0, #32] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #36] +#else + strd r4, r5, [r0, #32] +#endif mov r8, r6 mov r9, r7 # Round 12 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3632,42 +6669,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [r0, #16] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - ldrd r6, r7, [sp, #96] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #96] + ldr r7, [sp, #100] +#else + ldrd r6, r7, [sp, #96] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #96] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #96] + ldr r5, [r3, #100] +#else + ldrd r4, r5, [r3, #96] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #56] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - strd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #60] +#else + strd r6, r7, [r0, #56] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3682,31 +6773,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] +#else + ldrd r12, lr, [r0, #24] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #32] - ldrd r4, r5, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #24] str lr, [r0, #28] +#else + strd r12, lr, [r0, #24] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif mov r8, r6 mov r9, r7 # Round 13 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] +#else + ldrd r12, lr, [r0, #56] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3721,42 +6844,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #56] ldr lr, [r0, #60] - ldrd r4, r5, [r0] - ldrd r6, r7, [r0, #8] +#else + ldrd r12, lr, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r6, r7, [sp, #104] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #108] +#else + ldrd r6, r7, [sp, #104] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #104] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #104] + ldr r5, [r3, #108] +#else + ldrd r4, r5, [r3, #104] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #48] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #52] +#else + ldrd r6, r7, [r0, #48] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #24] ldr lr, [r0, #28] - strd r6, r7, [r0, #48] +#else + ldrd r12, lr, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #52] +#else + strd r6, r7, [r0, #48] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3771,31 +6948,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] +#else + ldrd r12, lr, [r0, #16] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #24] - ldrd r4, r5, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #24] + ldr r7, [r0, #28] +#else + ldrd r6, r7, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #36] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] +#else + strd r12, lr, [r0, #16] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #20] +#else + strd r4, r5, [r0, #16] +#endif mov r8, r6 mov r9, r7 # Round 14 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] +#else + ldrd r12, lr, [r0, #48] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3810,42 +7019,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [r0] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0] + ldr r7, [r0, #4] +#else + ldrd r6, r7, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - ldrd r6, r7, [sp, #112] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #112] + ldr r7, [sp, #116] +#else + ldrd r6, r7, [sp, #112] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #112] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #112] + ldr r5, [r3, #116] +#else + ldrd r4, r5, [r3, #112] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #40] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #44] +#else + ldrd r6, r7, [r0, #40] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - strd r6, r7, [r0, #40] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #44] +#else + strd r6, r7, [r0, #40] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3860,31 +7123,63 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] +#else + ldrd r12, lr, [r0, #8] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #16] - ldrd r4, r5, [r0, #24] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #16] + ldr r7, [r0, #20] +#else + ldrd r6, r7, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #8] str lr, [r0, #12] +#else + strd r12, lr, [r0, #8] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0, #8] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif mov r8, r6 mov r9, r7 # Round 15 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] +#else + ldrd r12, lr, [r0, #40] +#endif lsrs r4, r12, #14 lsrs r5, lr, #14 orr r5, r5, r12, lsl #18 @@ -3899,42 +7194,96 @@ L_SHA512_transform_len_start: lsls r7, lr, #23 orr r7, r7, r12, lsr #9 orr r6, r6, lr, lsr #9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #40] ldr lr, [r0, #44] - ldrd r4, r5, [r0, #48] - ldrd r6, r7, [r0, #56] +#else + ldrd r12, lr, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #52] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #60] +#else + ldrd r6, r7, [r0, #56] +#endif eor r4, r4, r6 eor r5, r5, r7 and r4, r4, r12 and r5, r5, lr eor r4, r4, r6 eor r5, r5, r7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r6, r7, [sp, #120] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #120] + ldr r7, [sp, #124] +#else + ldrd r6, r7, [sp, #120] +#endif adds r12, r12, r4 adc lr, lr, r5 - ldrd r4, r5, [r3, #120] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r3, #120] + ldr r5, [r3, #124] +#else + ldrd r4, r5, [r3, #120] +#endif adds r12, r12, r6 adc lr, lr, r7 - ldrd r6, r7, [r0, #32] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #32] + ldr r7, [r0, #36] +#else + ldrd r6, r7, [r0, #32] +#endif adds r12, r12, r4 adc lr, lr, r5 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif adds r6, r6, r12 adc r7, r7, lr +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #8] ldr lr, [r0, #12] - strd r6, r7, [r0, #32] +#else + ldrd r12, lr, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r6, [r0, #32] + str r7, [r0, #36] +#else + strd r6, r7, [r0, #32] +#endif lsrs r4, r12, #28 lsrs r5, lr, #28 orr r5, r5, r12, lsl #4 @@ -3949,89 +7298,265 @@ L_SHA512_transform_len_start: lsls r7, lr, #25 orr r7, r7, r12, lsr #7 orr r6, r6, lr, lsr #7 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] +#else + ldrd r12, lr, [r0] +#endif eor r4, r4, r6 eor r5, r5, r7 adds r12, r12, r4 adc lr, lr, r5 - ldrd r6, r7, [r0, #8] - ldrd r4, r5, [r0, #16] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [r0, #8] + ldr r7, [r0, #12] +#else + ldrd r6, r7, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #20] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] +#else + strd r12, lr, [r0] +#endif eor r6, r6, r4 eor r7, r7, r5 and r8, r8, r6 and r9, r9, r7 eor r8, r8, r4 eor r9, r9, r5 - ldrd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #4] +#else + ldrd r4, r5, [r0] +#endif adds r4, r4, r8 adc r5, r5, r9 - strd r4, r5, [r0] +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #4] +#else + strd r4, r5, [r0] +#endif mov r8, r6 mov r9, r7 # Add in digest from start +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0] ldr lr, [r0, #4] - ldrd r4, r5, [r0, #8] - ldrd r6, r7, [sp, #128] - ldrd r8, r9, [sp, #136] +#else + ldrd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #12] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #128] + ldr r7, [sp, #132] +#else + ldrd r6, r7, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #136] + ldr r9, [sp, #140] +#else + ldrd r8, r9, [sp, #136] +#endif adds r12, r12, r6 adc lr, lr, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0] str lr, [r0, #4] - strd r4, r5, [r0, #8] +#else + strd r12, lr, [r0] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #12] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #128] str lr, [sp, #132] - strd r4, r5, [sp, #136] +#else + strd r12, lr, [sp, #128] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #136] + str r5, [sp, #140] +#else + strd r4, r5, [sp, #136] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #16] ldr lr, [r0, #20] - ldrd r4, r5, [r0, #24] - ldrd r6, r7, [sp, #144] - ldrd r8, r9, [sp, #152] +#else + ldrd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #28] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #144] + ldr r7, [sp, #148] +#else + ldrd r6, r7, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #152] + ldr r9, [sp, #156] +#else + ldrd r8, r9, [sp, #152] +#endif adds r12, r12, r6 adc lr, lr, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #16] str lr, [r0, #20] - strd r4, r5, [r0, #24] +#else + strd r12, lr, [r0, #16] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #28] +#else + strd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #144] str lr, [sp, #148] - strd r4, r5, [sp, #152] +#else + strd r12, lr, [sp, #144] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #152] + str r5, [sp, #156] +#else + strd r4, r5, [sp, #152] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #32] ldr lr, [r0, #36] - ldrd r4, r5, [r0, #40] - ldrd r6, r7, [sp, #160] - ldrd r8, r9, [sp, #168] +#else + ldrd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #44] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #160] + ldr r7, [sp, #164] +#else + ldrd r6, r7, [sp, #160] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #168] + ldr r9, [sp, #172] +#else + ldrd r8, r9, [sp, #168] +#endif adds r12, r12, r6 adc lr, lr, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #32] str lr, [r0, #36] - strd r4, r5, [r0, #40] +#else + strd r12, lr, [r0, #32] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #40] + str r5, [r0, #44] +#else + strd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #160] str lr, [sp, #164] - strd r4, r5, [sp, #168] +#else + strd r12, lr, [sp, #160] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #168] + str r5, [sp, #172] +#else + strd r4, r5, [sp, #168] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) ldr r12, [r0, #48] ldr lr, [r0, #52] - ldrd r4, r5, [r0, #56] - ldrd r6, r7, [sp, #176] - ldrd r8, r9, [sp, #184] +#else + ldrd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #60] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r6, [sp, #176] + ldr r7, [sp, #180] +#else + ldrd r6, r7, [sp, #176] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + ldr r8, [sp, #184] + ldr r9, [sp, #188] +#else + ldrd r8, r9, [sp, #184] +#endif adds r12, r12, r6 adc lr, lr, r7 adds r4, r4, r8 adc r5, r5, r9 +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [r0, #48] str lr, [r0, #52] - strd r4, r5, [r0, #56] +#else + strd r12, lr, [r0, #48] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [r0, #56] + str r5, [r0, #60] +#else + strd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) str r12, [sp, #176] str lr, [sp, #180] - strd r4, r5, [sp, #184] +#else + strd r12, lr, [sp, #176] +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + str r4, [sp, #184] + str r5, [sp, #188] +#else + strd r4, r5, [sp, #184] +#endif subs r2, r2, #0x80 sub r3, r3, #0x200 add r1, r1, #0x80 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index ef1b2b1d4..2ba4e96e6 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -131,30 +131,130 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "sub sp, sp, #0xc0\n\t" "mov r3, %[L_SHA512_transform_len_k]\n\t" /* Copy digest to add in at end */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[sha512], #24]\n\t" + "ldr r9, [%[sha512], #28]\n\t" +#else "ldrd r8, r9, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #128]\n\t" + "str lr, [sp, #132]\n\t" +#else "strd r12, lr, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #136]\n\t" + "str r5, [sp, #140]\n\t" +#else "strd r4, r5, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #144]\n\t" + "str r7, [sp, #148]\n\t" +#else "strd r6, r7, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #152]\n\t" + "str r9, [sp, #156]\n\t" +#else "strd r8, r9, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[sha512], #56]\n\t" + "ldr r9, [%[sha512], #60]\n\t" +#else "ldrd r8, r9, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #160]\n\t" + "str lr, [sp, #164]\n\t" +#else "strd r12, lr, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #168]\n\t" + "str r5, [sp, #172]\n\t" +#else "strd r4, r5, [sp, #168]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [sp, #176]\n\t" + "str r7, [sp, #180]\n\t" +#else "strd r6, r7, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [sp, #184]\n\t" + "str r9, [sp, #188]\n\t" +#else "strd r8, r9, [sp, #184]\n\t" +#endif /* Start of loop processing a block */ "\n" "L_SHA512_transform_len_begin_%=: \n\t" /* Load, Reverse and Store W */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data]]\n\t" + "ldr lr, [%[data], #4]\n\t" +#else "ldrd r12, lr, [%[data]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #8]\n\t" + "ldr r5, [%[data], #12]\n\t" +#else "ldrd r4, r5, [%[data], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #16]\n\t" + "ldr r7, [%[data], #20]\n\t" +#else "ldrd r6, r7, [%[data], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #24]\n\t" + "ldr r9, [%[data], #28]\n\t" +#else "ldrd r8, r9, [%[data], #24]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -171,10 +271,30 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "str r6, [sp, #20]\n\t" "str r9, [sp, #24]\n\t" "str r8, [sp, #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data], #32]\n\t" + "ldr lr, [%[data], #36]\n\t" +#else "ldrd r12, lr, [%[data], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #40]\n\t" + "ldr r5, [%[data], #44]\n\t" +#else "ldrd r4, r5, [%[data], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #48]\n\t" + "ldr r7, [%[data], #52]\n\t" +#else "ldrd r6, r7, [%[data], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #56]\n\t" + "ldr r9, [%[data], #60]\n\t" +#else "ldrd r8, r9, [%[data], #56]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -191,10 +311,30 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "str r6, [sp, #52]\n\t" "str r9, [sp, #56]\n\t" "str r8, [sp, #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data], #64]\n\t" + "ldr lr, [%[data], #68]\n\t" +#else "ldrd r12, lr, [%[data], #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #72]\n\t" + "ldr r5, [%[data], #76]\n\t" +#else "ldrd r4, r5, [%[data], #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #80]\n\t" + "ldr r7, [%[data], #84]\n\t" +#else "ldrd r6, r7, [%[data], #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #88]\n\t" + "ldr r9, [%[data], #92]\n\t" +#else "ldrd r8, r9, [%[data], #88]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -211,10 +351,30 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "str r6, [sp, #84]\n\t" "str r9, [sp, #88]\n\t" "str r8, [sp, #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[data], #96]\n\t" + "ldr lr, [%[data], #100]\n\t" +#else "ldrd r12, lr, [%[data], #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[data], #104]\n\t" + "ldr r5, [%[data], #108]\n\t" +#else "ldrd r4, r5, [%[data], #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[data], #112]\n\t" + "ldr r7, [%[data], #116]\n\t" +#else "ldrd r6, r7, [%[data], #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[data], #120]\n\t" + "ldr r9, [%[data], #124]\n\t" +#else "ldrd r8, r9, [%[data], #120]\n\t" +#endif "rev r12, r12\n\t" "rev lr, lr\n\t" "rev r4, r4\n\t" @@ -232,8 +392,18 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "str r9, [sp, #120]\n\t" "str r8, [sp, #124]\n\t" /* Pre-calc: b ^ c */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[sha512], #8]\n\t" + "ldr r9, [%[sha512], #12]\n\t" +#else "ldrd r8, r9, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r8, r8, r12\n\t" "eor r9, r9, lr\n\t" "mov r10, #4\n\t" @@ -241,7 +411,12 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "\n" "L_SHA512_transform_len_start_%=: \n\t" /* Round 0 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -256,36 +431,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #4]\n\t" +#else "ldrd r6, r7, [sp]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3]\n\t" + "ldr r5, [r3, #4]\n\t" +#else "ldrd r4, r5, [r3]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else "strd r6, r7, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -300,28 +535,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else "strd r4, r5, [%[sha512], #56]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[0] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #112]\n\t" + "ldr lr, [sp, #116]\n\t" +#else "ldrd r12, lr, [sp, #112]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -337,14 +607,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp]\n\t" + "ldr lr, [sp, #4]\n\t" +#else "ldrd r12, lr, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #76]\n\t" +#else "ldrd r6, r7, [sp, #72]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp]\n\t" + "str lr, [sp, #4]\n\t" +#else "strd r12, lr, [sp]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #8]\n\t" + "ldr lr, [sp, #12]\n\t" +#else "ldrd r12, lr, [sp, #8]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -360,12 +650,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp]\n\t" + "ldr lr, [sp, #4]\n\t" +#else "ldrd r12, lr, [sp]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp]\n\t" + "str lr, [sp, #4]\n\t" +#else "strd r12, lr, [sp]\n\t" +#endif /* Round 1 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -380,36 +685,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #8]\n\t" + "ldr r5, [r3, #12]\n\t" +#else "ldrd r4, r5, [r3, #8]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else "strd r6, r7, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -424,28 +789,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else "strd r4, r5, [%[sha512], #48]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[1] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #120]\n\t" + "ldr lr, [sp, #124]\n\t" +#else "ldrd r12, lr, [sp, #120]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -461,14 +861,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #8]\n\t" + "ldr lr, [sp, #12]\n\t" +#else "ldrd r12, lr, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #84]\n\t" +#else "ldrd r6, r7, [sp, #80]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #8]\n\t" + "str lr, [sp, #12]\n\t" +#else "strd r12, lr, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #16]\n\t" + "ldr lr, [sp, #20]\n\t" +#else "ldrd r12, lr, [sp, #16]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -484,12 +904,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #8]\n\t" + "ldr lr, [sp, #12]\n\t" +#else "ldrd r12, lr, [sp, #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #8]\n\t" + "str lr, [sp, #12]\n\t" +#else "strd r12, lr, [sp, #8]\n\t" +#endif /* Round 2 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -504,36 +939,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" +#else "ldrd r6, r7, [sp, #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #16]\n\t" + "ldr r5, [r3, #20]\n\t" +#else "ldrd r4, r5, [r3, #16]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else "strd r6, r7, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -548,28 +1043,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else "strd r4, r5, [%[sha512], #40]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[2] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp]\n\t" + "ldr lr, [sp, #4]\n\t" +#else "ldrd r12, lr, [sp]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -585,14 +1115,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #16]\n\t" + "ldr lr, [sp, #20]\n\t" +#else "ldrd r12, lr, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #88]\n\t" + "ldr r7, [sp, #92]\n\t" +#else "ldrd r6, r7, [sp, #88]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #16]\n\t" + "str lr, [sp, #20]\n\t" +#else "strd r12, lr, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #24]\n\t" + "ldr lr, [sp, #28]\n\t" +#else "ldrd r12, lr, [sp, #24]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -608,12 +1158,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #16]\n\t" + "ldr lr, [sp, #20]\n\t" +#else "ldrd r12, lr, [sp, #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #16]\n\t" + "str lr, [sp, #20]\n\t" +#else "strd r12, lr, [sp, #16]\n\t" +#endif /* Round 3 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -628,36 +1193,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #28]\n\t" +#else "ldrd r6, r7, [sp, #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #24]\n\t" + "ldr r5, [r3, #28]\n\t" +#else "ldrd r4, r5, [r3, #24]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else "strd r6, r7, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -672,28 +1297,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else "strd r4, r5, [%[sha512], #32]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[3] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #8]\n\t" + "ldr lr, [sp, #12]\n\t" +#else "ldrd r12, lr, [sp, #8]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -709,14 +1369,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #24]\n\t" + "ldr lr, [sp, #28]\n\t" +#else "ldrd r12, lr, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #96]\n\t" + "ldr r7, [sp, #100]\n\t" +#else "ldrd r6, r7, [sp, #96]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #24]\n\t" + "str lr, [sp, #28]\n\t" +#else "strd r12, lr, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #32]\n\t" + "ldr lr, [sp, #36]\n\t" +#else "ldrd r12, lr, [sp, #32]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -732,12 +1412,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #24]\n\t" + "ldr lr, [sp, #28]\n\t" +#else "ldrd r12, lr, [sp, #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #24]\n\t" + "str lr, [sp, #28]\n\t" +#else "strd r12, lr, [sp, #24]\n\t" +#endif /* Round 4 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -752,36 +1447,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" +#else "ldrd r6, r7, [sp, #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #32]\n\t" + "ldr r5, [r3, #36]\n\t" +#else "ldrd r4, r5, [r3, #32]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else "strd r6, r7, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -796,28 +1551,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else "strd r4, r5, [%[sha512], #24]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[4] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #16]\n\t" + "ldr lr, [sp, #20]\n\t" +#else "ldrd r12, lr, [sp, #16]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -833,14 +1623,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #32]\n\t" + "ldr lr, [sp, #36]\n\t" +#else "ldrd r12, lr, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #108]\n\t" +#else "ldrd r6, r7, [sp, #104]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #32]\n\t" + "str lr, [sp, #36]\n\t" +#else "strd r12, lr, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #40]\n\t" + "ldr lr, [sp, #44]\n\t" +#else "ldrd r12, lr, [sp, #40]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -856,12 +1666,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #32]\n\t" + "ldr lr, [sp, #36]\n\t" +#else "ldrd r12, lr, [sp, #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #32]\n\t" + "str lr, [sp, #36]\n\t" +#else "strd r12, lr, [sp, #32]\n\t" +#endif /* Round 5 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -876,36 +1701,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #44]\n\t" +#else "ldrd r6, r7, [sp, #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #40]\n\t" + "ldr r5, [r3, #44]\n\t" +#else "ldrd r4, r5, [r3, #40]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else "strd r6, r7, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -920,28 +1805,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else "strd r4, r5, [%[sha512], #16]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[5] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #24]\n\t" + "ldr lr, [sp, #28]\n\t" +#else "ldrd r12, lr, [sp, #24]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -957,14 +1877,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #40]\n\t" + "ldr lr, [sp, #44]\n\t" +#else "ldrd r12, lr, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #112]\n\t" + "ldr r7, [sp, #116]\n\t" +#else "ldrd r6, r7, [sp, #112]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #40]\n\t" + "str lr, [sp, #44]\n\t" +#else "strd r12, lr, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #48]\n\t" + "ldr lr, [sp, #52]\n\t" +#else "ldrd r12, lr, [sp, #48]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -980,12 +1920,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #40]\n\t" + "ldr lr, [sp, #44]\n\t" +#else "ldrd r12, lr, [sp, #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #40]\n\t" + "str lr, [sp, #44]\n\t" +#else "strd r12, lr, [sp, #40]\n\t" +#endif /* Round 6 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1000,36 +1955,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #52]\n\t" +#else "ldrd r6, r7, [sp, #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #48]\n\t" + "ldr r5, [r3, #52]\n\t" +#else "ldrd r4, r5, [r3, #48]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else "strd r6, r7, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1044,28 +2059,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else "strd r4, r5, [%[sha512], #8]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[6] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #32]\n\t" + "ldr lr, [sp, #36]\n\t" +#else "ldrd r12, lr, [sp, #32]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1081,14 +2131,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #48]\n\t" + "ldr lr, [sp, #52]\n\t" +#else "ldrd r12, lr, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #120]\n\t" + "ldr r7, [sp, #124]\n\t" +#else "ldrd r6, r7, [sp, #120]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #48]\n\t" + "str lr, [sp, #52]\n\t" +#else "strd r12, lr, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #56]\n\t" + "ldr lr, [sp, #60]\n\t" +#else "ldrd r12, lr, [sp, #56]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1104,12 +2174,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #48]\n\t" + "ldr lr, [sp, #52]\n\t" +#else "ldrd r12, lr, [sp, #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #48]\n\t" + "str lr, [sp, #52]\n\t" +#else "strd r12, lr, [sp, #48]\n\t" +#endif /* Round 7 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1124,36 +2209,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" +#else "ldrd r6, r7, [sp, #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #56]\n\t" + "ldr r5, [r3, #60]\n\t" +#else "ldrd r4, r5, [r3, #56]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else "strd r6, r7, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1168,28 +2313,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else "strd r4, r5, [%[sha512]]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[7] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #40]\n\t" + "ldr lr, [sp, #44]\n\t" +#else "ldrd r12, lr, [sp, #40]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1205,14 +2385,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #56]\n\t" + "ldr lr, [sp, #60]\n\t" +#else "ldrd r12, lr, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #4]\n\t" +#else "ldrd r6, r7, [sp]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #56]\n\t" + "str lr, [sp, #60]\n\t" +#else "strd r12, lr, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #64]\n\t" + "ldr lr, [sp, #68]\n\t" +#else "ldrd r12, lr, [sp, #64]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1228,12 +2428,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #56]\n\t" + "ldr lr, [sp, #60]\n\t" +#else "ldrd r12, lr, [sp, #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #56]\n\t" + "str lr, [sp, #60]\n\t" +#else "strd r12, lr, [sp, #56]\n\t" +#endif /* Round 8 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1248,36 +2463,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #68]\n\t" +#else "ldrd r6, r7, [sp, #64]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #64]\n\t" + "ldr r5, [r3, #68]\n\t" +#else "ldrd r4, r5, [r3, #64]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else "strd r6, r7, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1292,28 +2567,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else "strd r4, r5, [%[sha512], #56]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[8] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #48]\n\t" + "ldr lr, [sp, #52]\n\t" +#else "ldrd r12, lr, [sp, #48]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1329,14 +2639,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #64]\n\t" + "ldr lr, [sp, #68]\n\t" +#else "ldrd r12, lr, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #64]\n\t" + "str lr, [sp, #68]\n\t" +#else "strd r12, lr, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #72]\n\t" + "ldr lr, [sp, #76]\n\t" +#else "ldrd r12, lr, [sp, #72]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1352,12 +2682,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #64]\n\t" + "ldr lr, [sp, #68]\n\t" +#else "ldrd r12, lr, [sp, #64]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #64]\n\t" + "str lr, [sp, #68]\n\t" +#else "strd r12, lr, [sp, #64]\n\t" +#endif /* Round 9 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1372,36 +2717,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #76]\n\t" +#else "ldrd r6, r7, [sp, #72]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #72]\n\t" + "ldr r5, [r3, #76]\n\t" +#else "ldrd r4, r5, [r3, #72]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else "strd r6, r7, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1416,28 +2821,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else "strd r4, r5, [%[sha512], #48]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[9] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #56]\n\t" + "ldr lr, [sp, #60]\n\t" +#else "ldrd r12, lr, [sp, #56]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1453,14 +2893,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #72]\n\t" + "ldr lr, [sp, #76]\n\t" +#else "ldrd r12, lr, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" +#else "ldrd r6, r7, [sp, #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #72]\n\t" + "str lr, [sp, #76]\n\t" +#else "strd r12, lr, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #80]\n\t" + "ldr lr, [sp, #84]\n\t" +#else "ldrd r12, lr, [sp, #80]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1476,12 +2936,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #72]\n\t" + "ldr lr, [sp, #76]\n\t" +#else "ldrd r12, lr, [sp, #72]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #72]\n\t" + "str lr, [sp, #76]\n\t" +#else "strd r12, lr, [sp, #72]\n\t" +#endif /* Round 10 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1496,36 +2971,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #84]\n\t" +#else "ldrd r6, r7, [sp, #80]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #80]\n\t" + "ldr r5, [r3, #84]\n\t" +#else "ldrd r4, r5, [r3, #80]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else "strd r6, r7, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1540,28 +3075,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else "strd r4, r5, [%[sha512], #40]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[10] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #64]\n\t" + "ldr lr, [sp, #68]\n\t" +#else "ldrd r12, lr, [sp, #64]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1577,14 +3147,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #80]\n\t" + "ldr lr, [sp, #84]\n\t" +#else "ldrd r12, lr, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #28]\n\t" +#else "ldrd r6, r7, [sp, #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #80]\n\t" + "str lr, [sp, #84]\n\t" +#else "strd r12, lr, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #88]\n\t" + "ldr lr, [sp, #92]\n\t" +#else "ldrd r12, lr, [sp, #88]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1600,12 +3190,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #80]\n\t" + "ldr lr, [sp, #84]\n\t" +#else "ldrd r12, lr, [sp, #80]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #80]\n\t" + "str lr, [sp, #84]\n\t" +#else "strd r12, lr, [sp, #80]\n\t" +#endif /* Round 11 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1620,36 +3225,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #88]\n\t" + "ldr r7, [sp, #92]\n\t" +#else "ldrd r6, r7, [sp, #88]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #88]\n\t" + "ldr r5, [r3, #92]\n\t" +#else "ldrd r4, r5, [r3, #88]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else "strd r6, r7, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1664,28 +3329,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else "strd r4, r5, [%[sha512], #32]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[11] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #72]\n\t" + "ldr lr, [sp, #76]\n\t" +#else "ldrd r12, lr, [sp, #72]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1701,14 +3401,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #88]\n\t" + "ldr lr, [sp, #92]\n\t" +#else "ldrd r12, lr, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" +#else "ldrd r6, r7, [sp, #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #88]\n\t" + "str lr, [sp, #92]\n\t" +#else "strd r12, lr, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #96]\n\t" + "ldr lr, [sp, #100]\n\t" +#else "ldrd r12, lr, [sp, #96]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1724,12 +3444,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #88]\n\t" + "ldr lr, [sp, #92]\n\t" +#else "ldrd r12, lr, [sp, #88]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #88]\n\t" + "str lr, [sp, #92]\n\t" +#else "strd r12, lr, [sp, #88]\n\t" +#endif /* Round 12 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1744,36 +3479,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #96]\n\t" + "ldr r7, [sp, #100]\n\t" +#else "ldrd r6, r7, [sp, #96]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #96]\n\t" + "ldr r5, [r3, #100]\n\t" +#else "ldrd r4, r5, [r3, #96]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else "strd r6, r7, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1788,28 +3583,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else "strd r4, r5, [%[sha512], #24]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[12] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #80]\n\t" + "ldr lr, [sp, #84]\n\t" +#else "ldrd r12, lr, [sp, #80]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1825,14 +3655,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #96]\n\t" + "ldr lr, [sp, #100]\n\t" +#else "ldrd r12, lr, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #44]\n\t" +#else "ldrd r6, r7, [sp, #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #96]\n\t" + "str lr, [sp, #100]\n\t" +#else "strd r12, lr, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #104]\n\t" + "ldr lr, [sp, #108]\n\t" +#else "ldrd r12, lr, [sp, #104]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1848,12 +3698,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #96]\n\t" + "ldr lr, [sp, #100]\n\t" +#else "ldrd r12, lr, [sp, #96]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #96]\n\t" + "str lr, [sp, #100]\n\t" +#else "strd r12, lr, [sp, #96]\n\t" +#endif /* Round 13 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1868,36 +3733,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #108]\n\t" +#else "ldrd r6, r7, [sp, #104]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #104]\n\t" + "ldr r5, [r3, #108]\n\t" +#else "ldrd r4, r5, [r3, #104]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else "strd r6, r7, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -1912,28 +3837,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else "strd r4, r5, [%[sha512], #16]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[13] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #88]\n\t" + "ldr lr, [sp, #92]\n\t" +#else "ldrd r12, lr, [sp, #88]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -1949,14 +3909,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #104]\n\t" + "ldr lr, [sp, #108]\n\t" +#else "ldrd r12, lr, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #52]\n\t" +#else "ldrd r6, r7, [sp, #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #104]\n\t" + "str lr, [sp, #108]\n\t" +#else "strd r12, lr, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #112]\n\t" + "ldr lr, [sp, #116]\n\t" +#else "ldrd r12, lr, [sp, #112]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -1972,12 +3952,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #104]\n\t" + "ldr lr, [sp, #108]\n\t" +#else "ldrd r12, lr, [sp, #104]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #104]\n\t" + "str lr, [sp, #108]\n\t" +#else "strd r12, lr, [sp, #104]\n\t" +#endif /* Round 14 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -1992,36 +3987,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #112]\n\t" + "ldr r7, [sp, #116]\n\t" +#else "ldrd r6, r7, [sp, #112]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #112]\n\t" + "ldr r5, [r3, #116]\n\t" +#else "ldrd r4, r5, [r3, #112]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else "strd r6, r7, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2036,28 +4091,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else "strd r4, r5, [%[sha512], #8]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[14] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #96]\n\t" + "ldr lr, [sp, #100]\n\t" +#else "ldrd r12, lr, [sp, #96]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -2073,14 +4163,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #112]\n\t" + "ldr lr, [sp, #116]\n\t" +#else "ldrd r12, lr, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" +#else "ldrd r6, r7, [sp, #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #112]\n\t" + "str lr, [sp, #116]\n\t" +#else "strd r12, lr, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #120]\n\t" + "ldr lr, [sp, #124]\n\t" +#else "ldrd r12, lr, [sp, #120]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -2096,12 +4206,27 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #112]\n\t" + "ldr lr, [sp, #116]\n\t" +#else "ldrd r12, lr, [sp, #112]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #112]\n\t" + "str lr, [sp, #116]\n\t" +#else "strd r12, lr, [sp, #112]\n\t" +#endif /* Round 15 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2116,36 +4241,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #120]\n\t" + "ldr r7, [sp, #124]\n\t" +#else "ldrd r6, r7, [sp, #120]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #120]\n\t" + "ldr r5, [r3, #124]\n\t" +#else "ldrd r4, r5, [r3, #120]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else "strd r6, r7, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2160,28 +4345,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else "strd r4, r5, [%[sha512]]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Calc new W[15] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #104]\n\t" + "ldr lr, [sp, #108]\n\t" +#else "ldrd r12, lr, [sp, #104]\n\t" +#endif "lsrs r4, r12, #19\n\t" "lsrs r5, lr, #19\n\t" "orr r5, r5, r12, lsl #13\n\t" @@ -2197,14 +4417,34 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #26\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #120]\n\t" + "ldr lr, [sp, #124]\n\t" +#else "ldrd r12, lr, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #68]\n\t" +#else "ldrd r6, r7, [sp, #64]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #120]\n\t" + "str lr, [sp, #124]\n\t" +#else "strd r12, lr, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp]\n\t" + "ldr lr, [sp, #4]\n\t" +#else "ldrd r12, lr, [sp]\n\t" +#endif "lsrs r4, r12, #1\n\t" "lsrs r5, lr, #1\n\t" "orr r5, r5, r12, lsl #31\n\t" @@ -2220,15 +4460,30 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r6, r6, lr, lsl #25\n\t" "eor r5, r5, r7\n\t" "eor r4, r4, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [sp, #120]\n\t" + "ldr lr, [sp, #124]\n\t" +#else "ldrd r12, lr, [sp, #120]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #120]\n\t" + "str lr, [sp, #124]\n\t" +#else "strd r12, lr, [sp, #120]\n\t" +#endif "add r3, r3, #0x80\n\t" "subs r10, r10, #1\n\t" "bne L_SHA512_transform_len_start_%=\n\t" /* Round 0 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2243,36 +4498,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp]\n\t" + "ldr r7, [sp, #4]\n\t" +#else "ldrd r6, r7, [sp]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3]\n\t" + "ldr r5, [r3, #4]\n\t" +#else "ldrd r4, r5, [r3]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else "strd r6, r7, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2287,28 +4602,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else "strd r4, r5, [%[sha512], #56]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 1 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2323,36 +4673,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #8]\n\t" + "ldr r7, [sp, #12]\n\t" +#else "ldrd r6, r7, [sp, #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #8]\n\t" + "ldr r5, [r3, #12]\n\t" +#else "ldrd r4, r5, [r3, #8]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else "strd r6, r7, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2367,28 +4777,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else "strd r4, r5, [%[sha512], #48]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 2 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2403,36 +4848,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #16]\n\t" + "ldr r7, [sp, #20]\n\t" +#else "ldrd r6, r7, [sp, #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #16]\n\t" + "ldr r5, [r3, #20]\n\t" +#else "ldrd r4, r5, [r3, #16]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else "strd r6, r7, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2447,28 +4952,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else "strd r4, r5, [%[sha512], #40]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 3 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2483,36 +5023,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #24]\n\t" + "ldr r7, [sp, #28]\n\t" +#else "ldrd r6, r7, [sp, #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #24]\n\t" + "ldr r5, [r3, #28]\n\t" +#else "ldrd r4, r5, [r3, #24]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else "strd r6, r7, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2527,28 +5127,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else "strd r4, r5, [%[sha512], #32]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 4 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2563,36 +5198,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" +#else "ldrd r6, r7, [sp, #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #32]\n\t" + "ldr r5, [r3, #36]\n\t" +#else "ldrd r4, r5, [r3, #32]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else "strd r6, r7, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2607,28 +5302,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else "strd r4, r5, [%[sha512], #24]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 5 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2643,36 +5373,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #44]\n\t" +#else "ldrd r6, r7, [sp, #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #40]\n\t" + "ldr r5, [r3, #44]\n\t" +#else "ldrd r4, r5, [r3, #40]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else "strd r6, r7, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2687,28 +5477,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else "strd r4, r5, [%[sha512], #16]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 6 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2723,36 +5548,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #52]\n\t" +#else "ldrd r6, r7, [sp, #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #48]\n\t" + "ldr r5, [r3, #52]\n\t" +#else "ldrd r4, r5, [r3, #48]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else "strd r6, r7, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2767,28 +5652,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else "strd r4, r5, [%[sha512], #8]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 7 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2803,36 +5723,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #60]\n\t" +#else "ldrd r6, r7, [sp, #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #56]\n\t" + "ldr r5, [r3, #60]\n\t" +#else "ldrd r4, r5, [r3, #56]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else "strd r6, r7, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2847,28 +5827,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else "strd r4, r5, [%[sha512]]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 8 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2883,36 +5898,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #68]\n\t" +#else "ldrd r6, r7, [sp, #64]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #64]\n\t" + "ldr r5, [r3, #68]\n\t" +#else "ldrd r4, r5, [r3, #64]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #24]\n\t" + "str r7, [%[sha512], #28]\n\t" +#else "strd r6, r7, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -2927,28 +6002,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #56]\n\t" + "str lr, [%[sha512], #60]\n\t" +#else "strd r12, lr, [%[sha512], #56]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else "strd r4, r5, [%[sha512], #56]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 9 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -2963,36 +6073,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #76]\n\t" +#else "ldrd r6, r7, [sp, #72]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #72]\n\t" + "ldr r5, [r3, #76]\n\t" +#else "ldrd r4, r5, [r3, #72]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #16]\n\t" + "str r7, [%[sha512], #20]\n\t" +#else "strd r6, r7, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3007,28 +6177,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #48]\n\t" + "str r5, [%[sha512], #52]\n\t" +#else "strd r4, r5, [%[sha512], #48]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 10 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3043,36 +6248,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #84]\n\t" +#else "ldrd r6, r7, [sp, #80]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #80]\n\t" + "ldr r5, [r3, #84]\n\t" +#else "ldrd r4, r5, [r3, #80]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #8]\n\t" + "str r7, [%[sha512], #12]\n\t" +#else "strd r6, r7, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3087,28 +6352,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #40]\n\t" + "str lr, [%[sha512], #44]\n\t" +#else "strd r12, lr, [%[sha512], #40]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else "strd r4, r5, [%[sha512], #40]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 11 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3123,36 +6423,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #88]\n\t" + "ldr r7, [sp, #92]\n\t" +#else "ldrd r6, r7, [sp, #88]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #88]\n\t" + "ldr r5, [r3, #92]\n\t" +#else "ldrd r4, r5, [r3, #88]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512]]\n\t" + "str r7, [%[sha512], #4]\n\t" +#else "strd r6, r7, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3167,28 +6527,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #32]\n\t" + "str r5, [%[sha512], #36]\n\t" +#else "strd r4, r5, [%[sha512], #32]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 12 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3203,36 +6598,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #96]\n\t" + "ldr r7, [sp, #100]\n\t" +#else "ldrd r6, r7, [sp, #96]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #96]\n\t" + "ldr r5, [r3, #100]\n\t" +#else "ldrd r4, r5, [r3, #96]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #56]\n\t" + "str r7, [%[sha512], #60]\n\t" +#else "strd r6, r7, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3247,28 +6702,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #24]\n\t" + "str lr, [%[sha512], #28]\n\t" +#else "strd r12, lr, [%[sha512], #24]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else "strd r4, r5, [%[sha512], #24]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 13 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3283,36 +6773,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #56]\n\t" + "ldr lr, [%[sha512], #60]\n\t" +#else "ldrd r12, lr, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #108]\n\t" +#else "ldrd r6, r7, [sp, #104]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #104]\n\t" + "ldr r5, [r3, #108]\n\t" +#else "ldrd r4, r5, [r3, #104]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #48]\n\t" + "ldr r7, [%[sha512], #52]\n\t" +#else "ldrd r6, r7, [%[sha512], #48]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #24]\n\t" + "ldr lr, [%[sha512], #28]\n\t" +#else "ldrd r12, lr, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #48]\n\t" + "str r7, [%[sha512], #52]\n\t" +#else "strd r6, r7, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3327,28 +6877,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #24]\n\t" + "ldr r7, [%[sha512], #28]\n\t" +#else "ldrd r6, r7, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #32]\n\t" + "ldr r5, [%[sha512], #36]\n\t" +#else "ldrd r4, r5, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #16]\n\t" + "str r5, [%[sha512], #20]\n\t" +#else "strd r4, r5, [%[sha512], #16]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 14 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3363,36 +6948,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512]]\n\t" + "ldr r7, [%[sha512], #4]\n\t" +#else "ldrd r6, r7, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #112]\n\t" + "ldr r7, [sp, #116]\n\t" +#else "ldrd r6, r7, [sp, #112]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #112]\n\t" + "ldr r5, [r3, #116]\n\t" +#else "ldrd r4, r5, [r3, #112]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #40]\n\t" + "ldr r7, [%[sha512], #44]\n\t" +#else "ldrd r6, r7, [%[sha512], #40]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #40]\n\t" + "str r7, [%[sha512], #44]\n\t" +#else "strd r6, r7, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3407,28 +7052,63 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #16]\n\t" + "ldr r7, [%[sha512], #20]\n\t" +#else "ldrd r6, r7, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #8]\n\t" + "str lr, [%[sha512], #12]\n\t" +#else "strd r12, lr, [%[sha512], #8]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else "strd r4, r5, [%[sha512], #8]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Round 15 */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif "lsrs r4, r12, #14\n\t" "lsrs r5, lr, #14\n\t" "orr r5, r5, r12, lsl #18\n\t" @@ -3443,36 +7123,96 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #23\n\t" "orr r7, r7, r12, lsr #9\n\t" "orr r6, r6, lr, lsr #9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #40]\n\t" + "ldr lr, [%[sha512], #44]\n\t" +#else "ldrd r12, lr, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #48]\n\t" + "ldr r5, [%[sha512], #52]\n\t" +#else "ldrd r4, r5, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #56]\n\t" + "ldr r7, [%[sha512], #60]\n\t" +#else "ldrd r6, r7, [%[sha512], #56]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "and r4, r4, r12\n\t" "and r5, r5, lr\n\t" "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #120]\n\t" + "ldr r7, [sp, #124]\n\t" +#else "ldrd r6, r7, [sp, #120]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [r3, #120]\n\t" + "ldr r5, [r3, #124]\n\t" +#else "ldrd r4, r5, [r3, #120]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #32]\n\t" + "ldr r7, [%[sha512], #36]\n\t" +#else "ldrd r6, r7, [%[sha512], #32]\n\t" +#endif "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "adds r6, r6, r12\n\t" "adc r7, r7, lr\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #8]\n\t" + "ldr lr, [%[sha512], #12]\n\t" +#else "ldrd r12, lr, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r6, [%[sha512], #32]\n\t" + "str r7, [%[sha512], #36]\n\t" +#else "strd r6, r7, [%[sha512], #32]\n\t" +#endif "lsrs r4, r12, #28\n\t" "lsrs r5, lr, #28\n\t" "orr r5, r5, r12, lsl #4\n\t" @@ -3487,75 +7227,265 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "lsls r7, lr, #25\n\t" "orr r7, r7, r12, lsr #7\n\t" "orr r6, r6, lr, lsr #7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif "eor r4, r4, r6\n\t" "eor r5, r5, r7\n\t" "adds r12, r12, r4\n\t" "adc lr, lr, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[sha512], #8]\n\t" + "ldr r7, [%[sha512], #12]\n\t" +#else "ldrd r6, r7, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #16]\n\t" + "ldr r5, [%[sha512], #20]\n\t" +#else "ldrd r4, r5, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif "eor r6, r6, r4\n\t" "eor r7, r7, r5\n\t" "and r8, r8, r6\n\t" "and r9, r9, r7\n\t" "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512]]\n\t" + "ldr r5, [%[sha512], #4]\n\t" +#else "ldrd r4, r5, [%[sha512]]\n\t" +#endif "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512]]\n\t" + "str r5, [%[sha512], #4]\n\t" +#else "strd r4, r5, [%[sha512]]\n\t" +#endif "mov r8, r6\n\t" "mov r9, r7\n\t" /* Add in digest from start */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512]]\n\t" + "ldr lr, [%[sha512], #4]\n\t" +#else "ldrd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #8]\n\t" + "ldr r5, [%[sha512], #12]\n\t" +#else "ldrd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #128]\n\t" + "ldr r7, [sp, #132]\n\t" +#else "ldrd r6, r7, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #136]\n\t" + "ldr r9, [sp, #140]\n\t" +#else "ldrd r8, r9, [sp, #136]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512]]\n\t" + "str lr, [%[sha512], #4]\n\t" +#else "strd r12, lr, [%[sha512]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #8]\n\t" + "str r5, [%[sha512], #12]\n\t" +#else "strd r4, r5, [%[sha512], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #128]\n\t" + "str lr, [sp, #132]\n\t" +#else "strd r12, lr, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #136]\n\t" + "str r5, [sp, #140]\n\t" +#else "strd r4, r5, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #16]\n\t" + "ldr lr, [%[sha512], #20]\n\t" +#else "ldrd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #24]\n\t" + "ldr r5, [%[sha512], #28]\n\t" +#else "ldrd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #144]\n\t" + "ldr r7, [sp, #148]\n\t" +#else "ldrd r6, r7, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #152]\n\t" + "ldr r9, [sp, #156]\n\t" +#else "ldrd r8, r9, [sp, #152]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #16]\n\t" + "str lr, [%[sha512], #20]\n\t" +#else "strd r12, lr, [%[sha512], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #24]\n\t" + "str r5, [%[sha512], #28]\n\t" +#else "strd r4, r5, [%[sha512], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #144]\n\t" + "str lr, [sp, #148]\n\t" +#else "strd r12, lr, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #152]\n\t" + "str r5, [sp, #156]\n\t" +#else "strd r4, r5, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #32]\n\t" + "ldr lr, [%[sha512], #36]\n\t" +#else "ldrd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #40]\n\t" + "ldr r5, [%[sha512], #44]\n\t" +#else "ldrd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #160]\n\t" + "ldr r7, [sp, #164]\n\t" +#else "ldrd r6, r7, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #168]\n\t" + "ldr r9, [sp, #172]\n\t" +#else "ldrd r8, r9, [sp, #168]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #32]\n\t" + "str lr, [%[sha512], #36]\n\t" +#else "strd r12, lr, [%[sha512], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #40]\n\t" + "str r5, [%[sha512], #44]\n\t" +#else "strd r4, r5, [%[sha512], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #160]\n\t" + "str lr, [sp, #164]\n\t" +#else "strd r12, lr, [sp, #160]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #168]\n\t" + "str r5, [sp, #172]\n\t" +#else "strd r4, r5, [sp, #168]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r12, [%[sha512], #48]\n\t" + "ldr lr, [%[sha512], #52]\n\t" +#else "ldrd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[sha512], #56]\n\t" + "ldr r5, [%[sha512], #60]\n\t" +#else "ldrd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #176]\n\t" + "ldr r7, [sp, #180]\n\t" +#else "ldrd r6, r7, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [sp, #184]\n\t" + "ldr r9, [sp, #188]\n\t" +#else "ldrd r8, r9, [sp, #184]\n\t" +#endif "adds r12, r12, r6\n\t" "adc lr, lr, r7\n\t" "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [%[sha512], #48]\n\t" + "str lr, [%[sha512], #52]\n\t" +#else "strd r12, lr, [%[sha512], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[sha512], #56]\n\t" + "str r5, [%[sha512], #60]\n\t" +#else "strd r4, r5, [%[sha512], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r12, [sp, #176]\n\t" + "str lr, [sp, #180]\n\t" +#else "strd r12, lr, [sp, #176]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [sp, #184]\n\t" + "str r5, [sp, #188]\n\t" +#else "strd r4, r5, [sp, #184]\n\t" +#endif "subs %[len], %[len], #0x80\n\t" "sub r3, r3, #0x200\n\t" "add %[data], %[data], #0x80\n\t" diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 1ad73675d..a37f13383 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -231,8 +231,12 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r10, #0\n\t" @@ -2680,8 +2684,12 @@ static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -2711,8 +2719,11 @@ static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -2756,8 +2767,12 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -2866,8 +2881,11 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -2939,8 +2957,12 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -3081,8 +3103,11 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -3210,8 +3235,12 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -3408,8 +3437,11 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" /* A[0] * A[0] */ @@ -4906,8 +4938,12 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -4973,8 +5009,12 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -5054,8 +5094,12 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -5165,8 +5209,12 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x100\n\t" @@ -5199,8 +5247,11 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -5234,8 +5285,12 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x200\n\t" "mov r5, #0\n\t" @@ -5320,8 +5375,11 @@ static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x200\n\t" "mov r12, #0\n\t" @@ -5478,8 +5536,12 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x80\n\t" @@ -5512,8 +5574,11 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -5547,8 +5612,12 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" "mov r5, #0\n\t" @@ -5633,8 +5702,11 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" "mov r12, #0\n\t" @@ -5795,8 +5867,12 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -5890,8 +5966,12 @@ static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -8468,8 +8548,13 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -8503,8 +8588,13 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -8634,8 +8724,12 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -9833,8 +9927,12 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -9928,8 +10026,12 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -11212,8 +11314,12 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -11267,8 +11373,12 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -11399,8 +11509,11 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -12192,8 +12305,13 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -12227,8 +12345,13 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -12470,8 +12593,12 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -14789,8 +14916,12 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x100\n\t" @@ -14822,8 +14953,12 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -14956,8 +15091,12 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -15011,8 +15150,12 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -15246,8 +15389,11 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -16514,8 +16660,13 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "mov r6, #0\n\t" @@ -16549,8 +16700,13 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r8, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -16988,8 +17144,12 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 -static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) +static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #31\n\t" "ldr r5, [%[a], #252]\n\t" @@ -17798,8 +17958,12 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r10, #0\n\t" @@ -23293,8 +23457,12 @@ static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -23331,8 +23499,11 @@ static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -23390,8 +23561,12 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -23518,8 +23693,11 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -23619,8 +23797,12 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -23789,8 +23971,11 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -23974,8 +24159,12 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -24228,8 +24417,11 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" /* A[0] * A[0] */ @@ -27288,8 +27480,12 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -27362,8 +27558,12 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -27457,8 +27657,12 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -27596,8 +27800,12 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x180\n\t" @@ -27630,8 +27838,11 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -27665,8 +27876,12 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x300\n\t" "mov r5, #0\n\t" @@ -27751,8 +27966,11 @@ static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x300\n\t" "mov r12, #0\n\t" @@ -27909,8 +28127,12 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0xc0\n\t" @@ -27943,8 +28165,11 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -27978,8 +28203,12 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x180\n\t" "mov r5, #0\n\t" @@ -28064,8 +28293,11 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x180\n\t" "mov r12, #0\n\t" @@ -28226,8 +28458,12 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -28321,8 +28557,12 @@ static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -32179,8 +32419,13 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -32214,8 +32459,13 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -32401,8 +32651,12 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -34160,8 +34414,12 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -34255,8 +34513,12 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -36179,8 +36441,12 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -36234,8 +36500,12 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -36366,8 +36636,11 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -37335,8 +37608,13 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -37370,8 +37648,13 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -37725,8 +38008,12 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -41164,8 +41451,12 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x180\n\t" @@ -41197,8 +41488,12 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -41387,8 +41682,12 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -41442,8 +41741,12 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -41677,8 +41980,11 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -43303,8 +43609,13 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "mov r6, #0\n\t" @@ -43338,8 +43649,13 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r8, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -43833,8 +44149,12 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 -static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) +static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #31\n\t" "ldr r5, [%[a], #380]\n\t" @@ -44834,8 +45154,11 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -45075,8 +45398,12 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -45394,8 +45721,12 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x200\n\t" @@ -45428,8 +45759,11 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -45463,8 +45797,12 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x400\n\t" "mov r5, #0\n\t" @@ -45549,8 +45887,11 @@ static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x400\n\t" "mov r12, #0\n\t" @@ -45709,8 +46050,12 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -45804,8 +46149,12 @@ static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -50943,8 +51292,13 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -50978,8 +51332,13 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -51445,8 +51804,12 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -56004,8 +56367,12 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x200\n\t" @@ -56037,8 +56404,12 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -56283,8 +56654,12 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -56338,8 +56713,12 @@ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -56573,8 +56952,11 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -58551,8 +58933,13 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "mov r6, #0\n\t" @@ -58586,8 +58973,13 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r8, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -59137,8 +59529,12 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 -static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) +static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #31\n\t" "ldr r5, [%[a], #508]\n\t" @@ -60204,8 +60600,12 @@ static const sp_digit p256_b[8] = { * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" "mov r5, #0\n\t" @@ -60292,8 +60692,12 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r10, #0\n\t" @@ -62742,8 +63146,11 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" "mov r12, #0\n\t" @@ -62880,8 +63287,11 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" /* A[0] * A[0] */ @@ -64380,8 +64790,12 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #32\n\t" @@ -64414,8 +64828,12 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -64448,8 +64866,12 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #32\n\t" @@ -64481,8 +64903,12 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -64513,8 +64939,11 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a The number to convert. * m The modulus (prime). */ -static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #24\n\t" "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -64711,7 +65140,7 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - (void)m; + (void)m_p; return (uint32_t)(size_t)r; } @@ -64919,8 +65348,12 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * m Modulus (prime). * mp Montgomery mulitplier. */ -static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" @@ -67489,9 +67922,9 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r12" ); - (void)mp; - (void)m; - (void)mp; + (void)mp_p; + (void)m_p; + (void)mp_p; } /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) @@ -67501,8 +67934,11 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const * m Modulus (prime). * mp Montgomery mulitplier. */ -static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" @@ -68928,8 +69364,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r12", "r8", "r9", "r10", "lr" ); - (void)m; - (void)mp; + (void)m_p; + (void)mp_p; } #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) @@ -69035,8 +69471,11 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -69174,8 +69613,13 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -69209,8 +69653,13 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -69259,8 +69708,12 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -69588,8 +70041,10 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_ * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "mov r1, #0\n\t" /* # i = 0 */ @@ -69681,8 +70136,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_ : : "memory", "r1", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - (void)m; - (void)mp; + (void)m_p; + (void)mp_p; } /* Reduce the number back to 256 bits using Montgomery reduction. @@ -69691,8 +70146,12 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_ * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -70062,8 +70521,12 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r8, r9, r10, r11}\n\t" @@ -70092,10 +70555,11 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, "sbcs r10, r10, r12\n\t" "sbc r11, r11, r3\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); + (void)m_p; } /* Double a Montgomery form number (r = a + a % m). @@ -70104,8 +70568,11 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -70129,10 +70596,11 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "sbcs r10, r10, r3\n\t" "sbc r11, r11, r2\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); + (void)m_p; } /* Triple a Montgomery form number (r = a + a + a % m). @@ -70141,8 +70609,11 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -70204,10 +70675,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "sbcs r10, r10, r3\n\t" "sbc r11, r11, r2\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); + (void)m_p; } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -70217,8 +70689,12 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r8, r9, r10, r11}\n\t" @@ -70247,10 +70723,11 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, "adcs r10, r10, r12\n\t" "adc r11, r11, r3\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); + (void)m_p; } #define sp_256_mont_sub_lower_8 sp_256_mont_sub_8 @@ -70260,8 +70737,12 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "ldm %[a], {r4, r5, r6, r7}\n\t" @@ -73809,8 +74290,10 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -static void sp_256_add_one_8(sp_digit* a) +static void sp_256_add_one_8(sp_digit* a_p) { + register sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r1, r2, r3, r4}\n\t" "adds r1, r1, #1\n\t" @@ -74205,8 +74688,11 @@ int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -74238,8 +74724,11 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) +static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -74271,8 +74760,12 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision digit. */ -static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -74366,8 +74859,12 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -74690,8 +75187,12 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -74745,8 +75246,12 @@ static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -75507,8 +76012,11 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL -static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) +static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "mov r11, #0\n\t" "mov r12, #0\n\t" @@ -75589,8 +76097,12 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus. */ -static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldr r4, [%[a]], #4\n\t" @@ -75701,8 +76213,10 @@ static const unsigned char L_sp_256_num_bits_8_table[] = { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, }; -static int sp_256_num_bits_8(const sp_digit* a) +static int sp_256_num_bits_8(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "mov lr, %[L_sp_256_num_bits_8_table]\n\t" "ldr r1, [%[a], #28]\n\t" @@ -76021,8 +76535,10 @@ static int sp_256_num_bits_8(const sp_digit* a) } #else -static int sp_256_num_bits_8(const sp_digit* a) +static int sp_256_num_bits_8(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" @@ -77198,8 +77714,12 @@ static const sp_digit p384_b[12] = { * a A single precision integer. * b A single precision integer. */ -static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x60\n\t" "mov r5, #0\n\t" @@ -77286,8 +77806,12 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r10, #0\n\t" @@ -82782,8 +83306,11 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x60\n\t" "mov r12, #0\n\t" @@ -82920,8 +83447,11 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" /* A[0] * A[0] */ @@ -85982,8 +86512,12 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #48\n\t" @@ -86016,8 +86550,12 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -86057,8 +86595,12 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #48\n\t" @@ -86090,8 +86632,12 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -86429,8 +86975,13 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -86464,8 +87015,13 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -86527,8 +87083,12 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -87138,8 +87698,11 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -87360,8 +87923,13 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register const sp_digit* m asm ("r3") = m_p; + sp_digit o; o = sp_384_add_12(r, a, b); @@ -87374,8 +87942,12 @@ static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + sp_digit o; o = sp_384_add_12(r, a, a); @@ -87388,8 +87960,12 @@ static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + sp_digit o; o = sp_384_add_12(r, a, a); @@ -87407,8 +87983,13 @@ static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "mov r6, #0\n\t" @@ -87442,8 +88023,13 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r8, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -87504,8 +88090,13 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register const sp_digit* m asm ("r3") = m_p; + sp_digit o; o = sp_384_sub_12(r, a, b); @@ -87516,8 +88107,11 @@ static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b #ifdef WOLFSSL_SP_SMALL #else #endif /* WOLFSSL_SP_SMALL */ -static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) +static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" @@ -91126,8 +91720,10 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -static void sp_384_add_one_12(sp_digit* a) +static void sp_384_add_one_12(sp_digit* a_p) { + register sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r1, r2, r3, r4}\n\t" "adds r1, r1, #1\n\t" @@ -91528,8 +92124,11 @@ int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -91561,8 +92160,11 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) +static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -91601,8 +92203,12 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision digit. */ -static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -91696,8 +92302,12 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -92180,8 +92790,12 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -92235,8 +92849,12 @@ static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -92978,8 +93596,12 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a Number to divide. * m Modulus. */ -static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "ldr r4, [%[a]], #4\n\t" "ands r3, r4, #1\n\t" @@ -93113,8 +93735,10 @@ static const unsigned char L_sp_384_num_bits_12_table[] = { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, }; -static int sp_384_num_bits_12(const sp_digit* a) +static int sp_384_num_bits_12(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "mov lr, %[L_sp_384_num_bits_12_table]\n\t" "ldr r1, [%[a], #44]\n\t" @@ -93685,8 +94309,10 @@ static int sp_384_num_bits_12(const sp_digit* a) } #else -static int sp_384_num_bits_12(const sp_digit* a) +static int sp_384_num_bits_12(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldr r1, [%[a], #44]\n\t" "cmp r1, #0\n\t" @@ -94968,8 +95594,12 @@ static const sp_digit p521_b[17] = { * a A single precision integer. * b A single precision integer. */ -static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" "mov r5, #0\n\t" @@ -95059,8 +95689,12 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "mov r10, #0\n\t" @@ -106074,8 +106708,11 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) +static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" "mov r12, #0\n\t" @@ -106215,8 +106852,11 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) +static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" /* A[0] * A[0] */ @@ -112086,8 +112726,12 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x40\n\t" @@ -112126,8 +112770,12 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -112178,8 +112826,12 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x40\n\t" @@ -112216,8 +112868,12 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -112482,8 +113138,13 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -112517,8 +113178,13 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -112597,8 +113263,10 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "mov r12, sp\n\t" @@ -112713,8 +113381,8 @@ static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - (void)m; - (void)mp; + (void)m_p; + (void)mp_p; } /* Reduce the number back to 521 bits using Montgomery reduction. @@ -112723,8 +113391,12 @@ static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -113589,8 +114261,11 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -113866,8 +114541,12 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "ldm %[a]!, {r8, r9, r10, r11}\n\t" @@ -113935,10 +114614,11 @@ static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b "ldm %[r], {r4}\n\t" "adcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); + (void)m_p; } /* Double a Montgomery form number (r = a + a % m). @@ -113947,8 +114627,11 @@ static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "mov r2, #0\n\t" "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -114007,10 +114690,11 @@ static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m "ldm %[r], {r4}\n\t" "adcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); + (void)m_p; } /* Triple a Montgomery form number (r = a + a + a % m). @@ -114019,8 +114703,11 @@ static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "mov r2, #0\n\t" "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -114099,10 +114786,11 @@ static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m "ldm %[r], {r4}\n\t" "adcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); + (void)m_p; } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -114112,8 +114800,12 @@ static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "ldm %[a]!, {r8, r9, r10, r11}\n\t" @@ -114182,15 +114874,19 @@ static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b "ldm %[r], {r4}\n\t" "sbcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); + (void)m_p; } #define sp_521_mont_sub_lower_17 sp_521_mont_sub_17 -static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) +static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" @@ -118441,8 +119137,10 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -static void sp_521_add_one_17(sp_digit* a) +static void sp_521_add_one_17(sp_digit* a_p) { + register sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r1, r2, r3, r4}\n\t" "adds r1, r1, #1\n\t" @@ -118845,8 +119543,12 @@ int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, #endif /* HAVE_ECC_DHE */ #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) -static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) +static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #32\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) @@ -118950,8 +119652,12 @@ static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) #endif #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY) -static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) +static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #31\n\t" "ldr r5, [%[a], #64]\n\t" @@ -119062,8 +119768,12 @@ static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) ); } -static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) +static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register byte n asm ("r2") = n_p; + __asm__ __volatile__ ( "rsb r12, %[n], #31\n\t" "ldr r5, [%[a], #132]\n\t" @@ -119282,8 +119992,11 @@ static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) +static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -119320,8 +120033,11 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) +static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -119371,8 +120087,12 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision digit. */ -static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -119466,8 +120186,12 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -120150,8 +120874,12 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -120205,8 +120933,12 @@ static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -120977,8 +121709,12 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a Number to divide. * m Modulus. */ -static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "ldr r4, [%[a]], #4\n\t" "ands r3, r4, #1\n\t" @@ -121147,8 +121883,10 @@ static const unsigned char L_sp_521_num_bits_17_table[] = { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, }; -static int sp_521_num_bits_17(const sp_digit* a) +static int sp_521_num_bits_17(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "mov lr, %[L_sp_521_num_bits_17_table]\n\t" "ldr r1, [%[a], #64]\n\t" @@ -122034,8 +122772,10 @@ static int sp_521_num_bits_17(const sp_digit* a) } #else -static int sp_521_num_bits_17(const sp_digit* a) +static int sp_521_num_bits_17(const sp_digit* a_p) { + register const sp_digit* a asm ("r0") = a_p; + __asm__ __volatile__ ( "ldr r1, [%[a], #64]\n\t" "cmp r1, #0\n\t" @@ -123275,8 +124015,12 @@ typedef struct sp_point_1024 { * a A single precision integer. * b A single precision integer. */ -static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" "mov r10, #0\n\t" @@ -133031,8 +133775,11 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) +static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" /* A[0] * A[0] */ @@ -138261,8 +139008,12 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -138306,8 +139057,11 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer and result. * b A single precision integer. */ -static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -138379,8 +139133,12 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" @@ -138522,8 +139280,12 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -138604,8 +139366,12 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" "mov r5, #0\n\t" @@ -138690,8 +139456,11 @@ static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) * r A single precision integer. * a A single precision integer. */ -static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) +static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "sub sp, sp, #0x100\n\t" "mov r12, #0\n\t" @@ -138914,8 +139683,11 @@ static const sp_point_1024 p1024_base = { * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r12, #0\n\t" @@ -138951,8 +139723,13 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r6, #0\n\t" "mov r12, #0\n\t" @@ -138986,8 +139763,13 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -139118,8 +139900,12 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x80\n\t" @@ -139153,8 +139939,12 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision digit. */ -static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -139248,8 +140038,12 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * a A single precision integer. * b A single precision digit. */ -static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register sp_digit b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r10, #0\n\t" /* A[0] * B */ @@ -140532,8 +141326,12 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr r6, %[div], #16\n\t" "add lr, r6, #1\n\t" @@ -140587,8 +141385,12 @@ static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) * * Note that this is an approximate div. It may give an answer 1 larger. */ -static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) { + register sp_digit d1 asm ("r0") = d1_p; + register sp_digit d0 asm ("r1") = d0_p; + register sp_digit div asm ("r2") = div_p; + __asm__ __volatile__ ( "lsr lr, %[div], #1\n\t" "add lr, lr, #1\n\t" @@ -140749,8 +141551,11 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) +static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) { + register const sp_digit* a asm ("r0") = a_p; + register const sp_digit* b asm ("r1") = b_p; + __asm__ __volatile__ ( "mov r2, #-1\n\t" "mov r6, #1\n\t" @@ -141468,8 +142273,12 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) { + register sp_digit* a asm ("r0") = a_p; + register const sp_digit* m asm ("r1") = m_p; + register sp_digit mp asm ("r2") = mp_p; + __asm__ __volatile__ ( #if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r11, [%[m]]\n\t" @@ -142777,8 +143586,13 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register const sp_digit* m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" @@ -142944,8 +143758,12 @@ static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -143095,8 +143913,12 @@ static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* m asm ("r2") = m_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -143402,8 +144224,13 @@ static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) +static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register const sp_digit* m asm ("r3") = m_p; + __asm__ __volatile__ ( "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -143567,8 +144394,13 @@ static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov lr, #0\n\t" "mov r6, #0\n\t" @@ -143602,8 +144434,13 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + register sp_digit m asm ("r3") = m_p; + __asm__ __volatile__ ( "mov r8, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" @@ -143727,8 +144564,11 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig } #endif /* WOLFSSL_SP_SMALL */ -static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) +static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + __asm__ __volatile__ ( "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" @@ -144080,8 +144920,12 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x80\n\t" @@ -144113,8 +144957,12 @@ static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) { + register sp_digit* r asm ("r0") = r_p; + register const sp_digit* a asm ("r1") = a_p; + register const sp_digit* b asm ("r2") = b_p; + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t"